#!/usr/bin/python #encoding=utf-8 # author: tangwy import json import os,re import codecs import csv import ConfigParser from ipaddr import IPRange from elasticsearch import Elasticsearch conf_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'conf') ini_path = os.path.join(conf_path, 'conf.ini') config = ConfigParser.ConfigParser() config.read(ini_path) ES_HOST = config.get('COMMON', 'es_host') ES_PER_COUNT = config.get('COMMON', 'es_per_count') ES_INDEX_NAME = config.get('COMMON', 'es_index_name') CSV_FILE_PATH = config.get('COMMON', 'csv_file_path') # scroll查询数据 def get_es_data(start_time,end_time): es = Elasticsearch(ES_HOST) msg = es.search(index=ES_INDEX_NAME,scroll="3m",size=ES_PER_COUNT,_source_includes= ["cookies","url","sip","dip"], query={ "bool": { "filter": { "range": { "timestamp": { "gte": start_time, "lte": end_time } } } } }) result = msg['hits']['hits'] total = msg['hits']['total'] scroll_id = msg['_scroll_id'] for i in range(0,int(total["value"]/ES_PER_COUNT)+1): query_scroll = es.scroll(scroll_id=scroll_id, scroll='3m')["hits"]["hits"] result += query_scroll return result # 读取csv文件 获取ip归属地 def get_ip_area_relation(csv_file_path): iprange_map = {} with codecs.open(csv_file_path, mode='r',encoding='utf-8') as file: csv_reader = csv.reader(file) for row in csv_reader: headers = next(csv_reader) ip_start = headers[0] ip_end = headers[1] ip_range = IPRange(ip_start, ip_end) ip_area = headers[5] print (ip_area) for ip in ip_range: iprange_map[ip] = ip_area return iprange_map get_ip_area_relation("/tmp/data/ip_area_relation.csv")