You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
2.6 KiB
85 lines
2.6 KiB
4 months ago
|
#!/usr/bin/python
|
||
|
#encoding=utf-8
|
||
|
# author: tangwy
|
||
|
import re,os,json
|
||
|
import codecs
|
||
|
from db2json import DBUtils
|
||
|
from datetime import datetime, timedelta
|
||
|
from ext_logging import logger_cron,get_clean_file_path
|
||
|
|
||
|
#写入大文件5M
|
||
|
def write_large_file(filename, data_list, chunk_size=1024*1024*5):
|
||
|
with codecs.open(filename, 'w', encoding='utf-8') as f:
|
||
|
for i in range(0, len(data_list), chunk_size):
|
||
|
chunk = data_list[i:i + chunk_size]
|
||
|
f.write(chunk)
|
||
|
|
||
|
#读取大文件
|
||
|
def read_large_json_file(filename, chunk_size=1024*1024*5): # 每次读取5MB的数据
|
||
|
json_object = ''
|
||
|
with codecs.open(filename, 'r', encoding='utf-8') as f:
|
||
|
while True:
|
||
|
chunk = f.read(chunk_size)
|
||
|
if not chunk:
|
||
|
break
|
||
|
json_object += chunk
|
||
|
|
||
|
data = json.loads(json_object)
|
||
|
return data
|
||
|
|
||
|
#删除文件
|
||
|
def delete_frile(file_path):
|
||
|
if os.path.exists(file_path):
|
||
|
os.remove(file_path)
|
||
|
|
||
|
#文件内容合并
|
||
|
def merge_data(datasets):
|
||
|
# 初始化一个空的字典来保存合并后的数据
|
||
|
merged_data = {
|
||
|
"ip": [],
|
||
|
"account": [],
|
||
|
"interface": [],
|
||
|
"menu": []
|
||
|
}
|
||
|
|
||
|
# 遍历所有数据集
|
||
|
for dataset in datasets:
|
||
|
# 遍历数据集中的每个类别
|
||
|
for category, items in dataset.items():
|
||
|
# 将当前数据集的项目添加到合并数据的相应类别中
|
||
|
merged_data[category].extend(items)
|
||
|
|
||
|
# 定义一个字典来存储聚合后的数据
|
||
|
aggregated_data = {
|
||
|
"ip": [],
|
||
|
"account": [],
|
||
|
"interface": [],
|
||
|
"menu": []
|
||
|
}
|
||
|
|
||
|
# 遍历所有类别
|
||
|
for category in aggregated_data:
|
||
|
# 创建一个字典来存储每个类别的聚合数据
|
||
|
category_data = {}
|
||
|
|
||
|
# 如果当前类别存在于merged_data中
|
||
|
if category in merged_data:
|
||
|
for item in merged_data[category]:
|
||
|
# 确定非计数字段
|
||
|
keys_to_use = [k for k in item if k != 'count']
|
||
|
# 使用元组作为键,包含所有非计数字段
|
||
|
key_tuple = tuple(item[k] for k in keys_to_use)
|
||
|
|
||
|
if key_tuple not in category_data:
|
||
|
category_data[key_tuple] = item['count']
|
||
|
else:
|
||
|
category_data[key_tuple] += item['count']
|
||
|
|
||
|
# 将聚合后的数据转换回原始格式
|
||
|
aggregated_data[category] = [
|
||
|
dict(zip(keys_to_use, key_tuple) + [('count', count)])
|
||
|
for key_tuple, count in category_data.items()
|
||
|
]
|
||
|
|
||
|
return aggregated_data
|
||
|
|