#!/usr/bin/python #encoding=utf-8 # author: tangwy import re,os,json import codecs from db2json import DBUtils from datetime import datetime, timedelta from ext_logging import logger_cron,get_clean_file_path #写入大文件5M def write_large_file(filename, data_list, chunk_size=1024*1024*5): with codecs.open(filename, 'w', encoding='utf-8') as f: for i in range(0, len(data_list), chunk_size): chunk = data_list[i:i + chunk_size] f.write(chunk) #读取大文件 def read_large_json_file(filename, chunk_size=1024*1024*5): # 每次读取5MB的数据 json_object = '' with codecs.open(filename, 'r', encoding='utf-8') as f: while True: chunk = f.read(chunk_size) if not chunk: break json_object += chunk data = json.loads(json_object) return data #删除文件 def delete_frile(file_path): if os.path.exists(file_path): os.remove(file_path) #文件内容合并 def merge_data(datasets): # 初始化一个空的字典来保存合并后的数据 merged_data = { "ip": [], "account": [], "interface": [], "menu": [] } # 遍历所有数据集 for dataset in datasets: # 遍历数据集中的每个类别 for category, items in dataset.items(): # 将当前数据集的项目添加到合并数据的相应类别中 merged_data[category].extend(items) # 定义一个字典来存储聚合后的数据 aggregated_data = { "ip": [], "account": [], "interface": [], "menu": [] } # 遍历所有类别 for category in aggregated_data: # 创建一个字典来存储每个类别的聚合数据 category_data = {} # 如果当前类别存在于merged_data中 if category in merged_data: for item in merged_data[category]: # 确定非计数字段 keys_to_use = [k for k in item if k != 'count'] # 使用元组作为键,包含所有非计数字段 key_tuple = tuple(item[k] for k in keys_to_use) if key_tuple not in category_data: category_data[key_tuple] = item['count'] else: category_data[key_tuple] += item['count'] # 将聚合后的数据转换回原始格式 aggregated_data[category] = [ dict(zip(keys_to_use, key_tuple) + [('count', count)]) for key_tuple, count in category_data.items() ] return aggregated_data