#!/usr/bin/python #encoding=utf-8 # author: tangwy import re,os,json import codecs from db2json import DBUtils from datetime import datetime, timedelta from ext_logging import logger_cron,get_clean_file_path TRACE_PATH = "/home/master/ISOP/apps/uebaMetricsAnalysis/logs/trace.cfg" #写入大文件5M def write_large_file(filename, data_list, chunk_size=1024*1024*20): with codecs.open(filename, 'w', encoding='utf-8') as f: for i in range(0, len(data_list), chunk_size): chunk = data_list[i:i + chunk_size] f.write(chunk) #判断文件是否大于500M def is_file_larger_than_500mb(file_path): file_size = os.path.getsize(file_path) file_size_in_mb = file_size / (1024.0 * 1024) if file_size_in_mb > 500: return True else: return False #读取大文件 def read_large_json_file(filename, chunk_size=1024*1024*10): # 每次读取10MB的数据 json_object = '' with codecs.open(filename, 'r', encoding='utf-8') as f: while True: chunk = f.read(chunk_size) if not chunk: break json_object += chunk data = json.loads(json_object) return data #删除文件 def delete_frile(file_path): if os.path.exists(file_path): os.remove(file_path) #文件内容合并 def merge_data(datasets): # 初始化一个空的字典来保存合并后的数据 merged_data = { "ip": [], "account": [], "interface": [], "menu": [] } # 遍历所有数据集 for dataset in datasets: # 遍历数据集中的每个类别 for category, items in dataset.items(): # 将当前数据集的项目添加到合并数据的相应类别中 merged_data[category].extend(items) # 定义一个字典来存储聚合后的数据 aggregated_data = { "ip": [], "account": [], "interface": [], "menu": [] } # 遍历所有类别 for category in aggregated_data: # 创建一个字典来存储每个类别的聚合数据 category_data = {} # 如果当前类别存在于merged_data中 if category in merged_data: for item in merged_data[category]: # 确定非计数字段 keys_to_use = [k for k in item if k != 'count'] # 使用元组作为键,包含所有非计数字段 key_tuple = tuple(item[k] for k in keys_to_use) if key_tuple not in category_data: category_data[key_tuple] = item['count'] else: category_data[key_tuple] += item['count'] # 将聚合后的数据转换回原始格式 aggregated_data[category] = [ dict(zip(keys_to_use, key_tuple) + [('count', count)]) for key_tuple, count in category_data.items() ] return aggregated_data def merge_data_new(datasets): # 创建一个新的空字典来存储结果 result = {} # 遍历列表中的每一个字典 for d in datasets: for key, value in d.iteritems(): # 遍历当前字典中的键值对 if key in result: # 如果键已经存在于结果中,则将值相加 result[key] = str(int(result[key]) + int(value)) else: # 否则,直接添加键值对 result[key] = value return result #获取文件内容不做jsonload处理 def get_file_content(): json_object = '' if os.path.exists(TRACE_PATH): with codecs.open(TRACE_PATH, 'r', encoding='utf-8') as f: while True: chunk = f.read(1024*1024*1) if not chunk: break json_object += chunk return json_object