You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hbyd_ueba/utils/file_helper.py

85 lines
2.6 KiB

#!/usr/bin/python
#encoding=utf-8
# author: tangwy
import re,os,json
import codecs
from db2json import DBUtils
from datetime import datetime, timedelta
from ext_logging import logger_cron,get_clean_file_path
#写入大文件5M
def write_large_file(filename, data_list, chunk_size=1024*1024*5):
with codecs.open(filename, 'w', encoding='utf-8') as f:
for i in range(0, len(data_list), chunk_size):
chunk = data_list[i:i + chunk_size]
f.write(chunk)
#读取大文件
def read_large_json_file(filename, chunk_size=1024*1024*5): # 每次读取5MB的数据
json_object = ''
with codecs.open(filename, 'r', encoding='utf-8') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
json_object += chunk
data = json.loads(json_object)
return data
#删除文件
def delete_frile(file_path):
if os.path.exists(file_path):
os.remove(file_path)
#文件内容合并
def merge_data(datasets):
# 初始化一个空的字典来保存合并后的数据
merged_data = {
"ip": [],
"account": [],
"interface": [],
"menu": []
}
# 遍历所有数据集
for dataset in datasets:
# 遍历数据集中的每个类别
for category, items in dataset.items():
# 将当前数据集的项目添加到合并数据的相应类别中
merged_data[category].extend(items)
# 定义一个字典来存储聚合后的数据
aggregated_data = {
"ip": [],
"account": [],
"interface": [],
"menu": []
}
# 遍历所有类别
for category in aggregated_data:
# 创建一个字典来存储每个类别的聚合数据
category_data = {}
# 如果当前类别存在于merged_data中
if category in merged_data:
for item in merged_data[category]:
# 确定非计数字段
keys_to_use = [k for k in item if k != 'count']
# 使用元组作为键,包含所有非计数字段
key_tuple = tuple(item[k] for k in keys_to_use)
if key_tuple not in category_data:
category_data[key_tuple] = item['count']
else:
category_data[key_tuple] += item['count']
# 将聚合后的数据转换回原始格式
aggregated_data[category] = [
dict(zip(keys_to_use, key_tuple) + [('count', count)])
for key_tuple, count in category_data.items()
]
return aggregated_data