You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hbyd_ueba/business/es_query.py

65 lines
1.9 KiB

#!/usr/bin/python
#encoding=utf-8
# author: tangwy
import json
import os,re
import codecs
import csv
import ConfigParser
from ipaddr import IPRange
from elasticsearch import Elasticsearch
conf_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'conf')
ini_path = os.path.join(conf_path, 'conf.ini')
config = ConfigParser.ConfigParser()
config.read(ini_path)
ES_HOST = config.get('COMMON', 'es_host')
ES_PER_COUNT = config.get('COMMON', 'es_per_count')
ES_INDEX_NAME = config.get('COMMON', 'es_index_name')
CSV_FILE_PATH = config.get('COMMON', 'csv_file_path')
# scroll查询数据
def get_es_data(start_time,end_time):
es = Elasticsearch(ES_HOST)
msg = es.search(index=ES_INDEX_NAME,scroll="3m",size=ES_PER_COUNT,_source_includes= ["cookies","url","sip","dip"], query={
"bool": {
"filter": {
"range": {
"timestamp": {
"gte": start_time,
"lte": end_time
}
}
}
}
})
result = msg['hits']['hits']
total = msg['hits']['total']
scroll_id = msg['_scroll_id']
for i in range(0,int(total["value"]/ES_PER_COUNT)+1):
query_scroll = es.scroll(scroll_id=scroll_id, scroll='3m')["hits"]["hits"]
result += query_scroll
return result
# 读取csv文件 获取ip归属地
def get_ip_area_relation(csv_file_path):
iprange_map = {}
with codecs.open(csv_file_path, mode='r',encoding='utf-8') as file:
csv_reader = csv.reader(file)
for row in csv_reader:
headers = next(csv_reader)
ip_start = headers[0]
ip_end = headers[1]
ip_range = IPRange(ip_start, ip_end)
ip_area = headers[5]
print (ip_area)
for ip in ip_range:
iprange_map[ip] = ip_area
return iprange_map
get_ip_area_relation("/tmp/data/ip_area_relation.csv")