|
|
|
#!/usr/bin/python
|
|
|
|
#encoding=utf-8
|
|
|
|
# author: tangwy
|
|
|
|
import re,os,json
|
|
|
|
import codecs
|
|
|
|
from db2json import DBUtils
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from ext_logging import logger_cron,get_clean_file_path
|
|
|
|
|
|
|
|
TRACE_PATH = "/home/master/ISOP/apps/uebaMetricsAnalysis/logs/trace.cfg"
|
|
|
|
|
|
|
|
#写入大文件5M
|
|
|
|
def write_large_file(filename, data_list, chunk_size=1024*1024*20):
|
|
|
|
with codecs.open(filename, 'w', encoding='utf-8') as f:
|
|
|
|
for i in range(0, len(data_list), chunk_size):
|
|
|
|
chunk = data_list[i:i + chunk_size]
|
|
|
|
f.write(chunk)
|
|
|
|
|
|
|
|
#判断文件是否大于500M
|
|
|
|
def is_file_larger_than_500mb(file_path):
|
|
|
|
file_size = os.path.getsize(file_path)
|
|
|
|
file_size_in_mb = file_size / (1024.0 * 1024)
|
|
|
|
if file_size_in_mb > 500:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
#读取大文件
|
|
|
|
def read_large_json_file(filename, chunk_size=1024*1024*10): # 每次读取10MB的数据
|
|
|
|
json_object = ''
|
|
|
|
with codecs.open(filename, 'r', encoding='utf-8') as f:
|
|
|
|
while True:
|
|
|
|
chunk = f.read(chunk_size)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
json_object += chunk
|
|
|
|
|
|
|
|
data = json.loads(json_object)
|
|
|
|
return data
|
|
|
|
|
|
|
|
#删除文件
|
|
|
|
def delete_frile(file_path):
|
|
|
|
if os.path.exists(file_path):
|
|
|
|
os.remove(file_path)
|
|
|
|
|
|
|
|
#文件内容合并
|
|
|
|
def merge_data(datasets):
|
|
|
|
# 初始化一个空的字典来保存合并后的数据
|
|
|
|
merged_data = {
|
|
|
|
"ip": [],
|
|
|
|
"account": [],
|
|
|
|
"interface": [],
|
|
|
|
"menu": []
|
|
|
|
}
|
|
|
|
|
|
|
|
# 遍历所有数据集
|
|
|
|
for dataset in datasets:
|
|
|
|
# 遍历数据集中的每个类别
|
|
|
|
for category, items in dataset.items():
|
|
|
|
# 将当前数据集的项目添加到合并数据的相应类别中
|
|
|
|
merged_data[category].extend(items)
|
|
|
|
|
|
|
|
# 定义一个字典来存储聚合后的数据
|
|
|
|
aggregated_data = {
|
|
|
|
"ip": [],
|
|
|
|
"account": [],
|
|
|
|
"interface": [],
|
|
|
|
"menu": []
|
|
|
|
}
|
|
|
|
|
|
|
|
# 遍历所有类别
|
|
|
|
for category in aggregated_data:
|
|
|
|
# 创建一个字典来存储每个类别的聚合数据
|
|
|
|
category_data = {}
|
|
|
|
|
|
|
|
# 如果当前类别存在于merged_data中
|
|
|
|
if category in merged_data:
|
|
|
|
for item in merged_data[category]:
|
|
|
|
# 确定非计数字段
|
|
|
|
keys_to_use = [k for k in item if k != 'count']
|
|
|
|
# 使用元组作为键,包含所有非计数字段
|
|
|
|
key_tuple = tuple(item[k] for k in keys_to_use)
|
|
|
|
|
|
|
|
if key_tuple not in category_data:
|
|
|
|
category_data[key_tuple] = item['count']
|
|
|
|
else:
|
|
|
|
category_data[key_tuple] += item['count']
|
|
|
|
|
|
|
|
# 将聚合后的数据转换回原始格式
|
|
|
|
aggregated_data[category] = [
|
|
|
|
dict(zip(keys_to_use, key_tuple) + [('count', count)])
|
|
|
|
for key_tuple, count in category_data.items()
|
|
|
|
]
|
|
|
|
|
|
|
|
return aggregated_data
|
|
|
|
|
|
|
|
def merge_data_new(datasets):
|
|
|
|
# 创建一个新的空字典来存储结果
|
|
|
|
result = {}
|
|
|
|
|
|
|
|
# 遍历列表中的每一个字典
|
|
|
|
for d in datasets:
|
|
|
|
for key, value in d.iteritems(): # 遍历当前字典中的键值对
|
|
|
|
if key in result:
|
|
|
|
# 如果键已经存在于结果中,则将值相加
|
|
|
|
result[key] = str(int(result[key]) + int(value))
|
|
|
|
else:
|
|
|
|
# 否则,直接添加键值对
|
|
|
|
result[key] = value
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
#获取文件内容不做jsonload处理
|
|
|
|
def get_file_content():
|
|
|
|
json_object = ''
|
|
|
|
if os.path.exists(TRACE_PATH):
|
|
|
|
with codecs.open(TRACE_PATH, 'r', encoding='utf-8') as f:
|
|
|
|
while True:
|
|
|
|
chunk = f.read(1024*1024*1)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
json_object += chunk
|
|
|
|
|
|
|
|
return json_object
|