diff --git a/cron/tmp_log_alarm copy.py b/cron/tmp_log_alarm copy.py new file mode 100644 index 0000000..9d15ce3 --- /dev/null +++ b/cron/tmp_log_alarm copy.py @@ -0,0 +1,176 @@ +# coding:utf-8 + +import sys +import uuid +import json +import time +import random + +# path = str(sys.path[0]) +# home_path = path.split("isop_uebaapiData")[0] +# sys.path.append(home_path) +from util import send_logs + +def alarm(cookies, api): + """2、HTTP日志""" + inputstr = '''[{"msgtype":1,"hash":"8DE9-BDAB-F622-2FA8","dev_ip":"10.67.5.17","product":"uts"},{"sid":"6004744450036c44f815500016d00a5f5151105430a3ed","timestamp":1567673939,"sip":"10.67.0.52","sport":5624,"dip":"10.67.0.53","dport":80,"protocol":6,"app":3087428650795009,"app_proto":8,"direct":4,"app.detail":{"method":"GET","http_protocol":"1.1","ret_code":200,"host":"10.67.1.1","uri":"/webtest/uploadFile.php","referer":"http://[2222::65]/webtest/","content_type":" multipart/form-data; boundary=----WebKitFormBoundary2zcCUl4lQf1h7A7S","content_type_server":" text/html","server":"Apache/2.4.4 (Win32) OpenSSL/0.9.8y PHP/5.4.19","user_agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36","link":"","cookies":"loginmainacctid=wangshiguang;operatorId=d2601586;com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;","content_encoding":"","location":"","content_length":70080,"content_length_server":200,"set_cookie":"","range":"","connection":"keep-alive","connection_server":"Keep-Alive","x_forwarded_for":"","post_data":"LS0tLS0tV2ViS2l0Rm9ybUJvdW5kYXJ5MnpjQ1VsNGxRZjFoN0E3Uw0KQ29udGVudC1EaXNwb3NpdGlvbjogZm9ybS1kYXRhOyBuYW1lPSJmaWxlIjsgZmlsZW5hbWU9IjAwMDFhZDQ0MWRjYjM4NjIxOGE3Njk5MmFjZjhiNzA1Ig0=","response_body":"VXBsb2FkOiAwMDAxYWQ0NDFkY2IzODYyMThhNzY5OTJhY2Y4YjcwNTxiciAvPlR5cGU6IGFwcGxpY2F0aW9uL29jdGV0LXN0cmVhbTxiciAvPlNpemU6IDY4LjEyNzkyOTY4NzUgS2I8YnIgLz5UZW1wIGZpbGU6IEQ6XHhhbXBwXHRtcFxwaHA2ODI1LnRtcDxiciAvPjAwMDFhZDQ0MWRjYjM4NjIxOGE3Njk5MmFjZjhiNzA1IGFscmVhZHkgZXhpc3RzLiA="}}]''' + inputarr = json.loads(inputstr, strict=False) + # 随机生成timestamp + inputarr[1]["timestamp"] = int(time.time()) + inputarr[1]["sid"] = str(uuid.uuid1()) + # inputarr[1]["sip"] = "10.67.4.33" + inputarr[1]["sip"] = generate_random_ip() + inputarr[1]["dip"] = "10.67.1.1" + inputarr[1]["dport"] = "8180" + inputarr[1]["app.detail"]["uri"] = "/alarmtest.action?BMECID=352432757&BMETimestamp=1692788489260&queryNumber=158713459" + inputarr[1]["app.detail"]["host"] = api + inputarr[1]["app.detail"]["cookies"] = cookies + inputarr[1]["account"] = get_random_person() + inputarr[1]["trojan_type"] = get_random_jobnum() + inputarr[1]["worm_family"] = get_random_menu() + inputarr[1]["interface"] = get_random_inteface() + + return json.dumps(inputarr) + +def generate_random_ip(): + # 固定前缀 "192.168." + prefix = "192.168." + # 生成随机的第三和第四段IP地址 + third_octet = 19 + fourth_octet = random.randint(0, 50) + # 拼接IP地址 + ip = "{}{}.{}".format(prefix, third_octet, fourth_octet) + return ip + +def AbIDVisitAPINums510404(): + datalist = {"TCP_5011": list()} + ID2Area = { + "武汉": ["1101820", "1101821", "1101822", "1101823", "1101825"], + "荆州": ["2001800", "2001801", "2001808"], + "江汉": ["1801820", "1801810"], + "省公司市场部": ["1002011", "1002012", "1002013"] + } + api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", + "good.alarm.com"] + info_list = [ + ["u-locale=zh_CN;loginmainacctid=zhang3;operatorId=" + ID2Area["武汉"][ + 0] + ";com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;", 60], + ["u-locale=zh_CN;loginmainacctid=zhang3;operatorId=" + ID2Area["荆州"][ + 2] + ";com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;", 120] + ] + for i in range(len(info_list)): + cookies = info_list[i][0] + count = info_list[i][1] + for j in range(count): + api = random.choice(api_list) + datalist["TCP_5011"].append(alarm(cookies, api)) + for key in datalist.keys(): + send_logs(datalist[key]) + return "510405场景的告警数据已生成" + +def get_random_jobnum(): + # 定义包含不同前缀的字符串数组 + prefix_strings = [ + ['10243', '10895', '10134', '10781', '10962'], # 10打头的字符串示例 + ['11089', '11057', '11023', '11016', '11030'], # 110打头的字符串示例 + ['14076', '14049', '14098', '14032', '14061'], # 140打头的字符串示例 + ['26054', '26013', '26087', '26029', '26061'], # 260打头的字符串示例 + ['20083', '20015', '20072', '20096', '20048'], # 200打头的字符串示例 + ['19035', '19017', '19049', '19082', '19096'], # 190打头的字符串示例 + ['180237', '180276', '180204', '180295', '180219'] # 1802打头的字符串示例 + ] + + # 随机选择一个前缀数组 + selected_prefix_array = random.choice(prefix_strings) + # 随机选择一个具体的字符串 + selected_string = random.choice(selected_prefix_array) + return selected_string + +def get_random_person(): + people_list = [ + "Alice", "Bob", "Charlie", "David", "Emma", "Frank", "Grace2","Alice2", "Bob2", "Charlie2", "David2", "Emma2", "Frank2", "Grace2" + ] + + random_person = random.choice(people_list) + return random_person + +def get_random_menu(): + # 定义系统菜单列表 + system_menu = [ + "开发", "测试", "部署", "配置", "设置", "安装", "卸载", "升级", "更新", + "修复", "修正", "修补", "更新", "安全", "保护", "防护", "防御", "防止", + "检查", "扫描", "监控", "跟踪", "追踪", "审计", "审查", "测试", "测量" + ] + + # 随机选择一个菜单项 + random_menu_item = random.choice(system_menu) + return random_menu_item + +def get_random_inteface(): + # 定义系统菜单列表 + system_menu = [ + "http://bai1.doc.com/api", "http://bai2.doc.com/api", "http://bai3.doc.com/api", "http://bai4.doc.com/api", "http://bai5.doc.com/api", "http://bai12.doc.com/api","http://bai13.doc.com/api", "http://bai19.doc.com/api", + "http://bai6.doc.com/api", "http://bai7.doc.com/api", "http://bai8.doc.com/api", "http://bai9.doc.com/api", "http://bai11.doc.com/api" + ] + + # 随机选择一个菜单项 + random_menu_item = random.choice(system_menu) + return random_menu_item + +if __name__ == '__main__': + datalist = {"TCP_5011": list()} + ID2Area = { + "武汉": ["1101820", "1101821", "1101822", "1101823", "1101825"], + "荆州": ["2001800", "2001801", "2001808"], + "江汉": ["1801820", "1801810"], + "省公司市场部": ["1002011", "1002012", "1002013"] + } + api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", "good.alarm.com","baidu.com","sohu.com","xinlang.com","erpx.com"] + info_list = [ + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300], + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200] + ] + + for i in range(len(info_list)): + cookies = info_list[i][0] + count = info_list[i][1] + for j in range(count): + api = random.choice(api_list) + datalist["TCP_5011"].append(alarm(cookies, api)) + for key in datalist.keys(): + send_logs(datalist[key]) + print "510405场景的告警数据已生成" \ No newline at end of file diff --git a/cron/tmp_log_alarm.py b/cron/tmp_log_alarm.py index 08d2200..d97bb09 100644 --- a/cron/tmp_log_alarm.py +++ b/cron/tmp_log_alarm.py @@ -113,76 +113,7 @@ if __name__ == '__main__': api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", "good.alarm.com","baidu.com","sohu.com","xinlang.com","erpx.com"] info_list = [ ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 5000], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100], - ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100] + ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000] ] diff --git a/uebaMetricsAnalysis.tar b/uebaMetricsAnalysis.tar index a5d52f8..d541b4e 100644 Binary files a/uebaMetricsAnalysis.tar and b/uebaMetricsAnalysis.tar differ diff --git a/utils/base_dataclean_pg.py b/utils/base_dataclean_pg.py index b2553b5..b6d8afe 100644 --- a/utils/base_dataclean_pg.py +++ b/utils/base_dataclean_pg.py @@ -8,6 +8,7 @@ import calendar import codecs from esUtil import EsUtil from file_helper import write_large_file,merge_data +from file_helper import read_large_json_file,json_to_csv_data,write_csv from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron,get_clean_file_path from collections import defaultdict @@ -273,23 +274,23 @@ def group_and_write_to_file(data_ip, data_account, data_interface, data_menu, st logger_cron.info("JOB: "+jobid+",写入文件base路径"+base_path) date_time = convert_utc_to_local_time(start) #临时文件 临时文件格式:20240720-1630_tmp.json - tmp_file_name = time.strftime("%Y%m%d-%H%M_tmp.json", date_time) + tmp_file_name = time.strftime("%Y%m%d-%H%M_tmp.csv", date_time) tmp_file_path = os.path.join(base_path,tmp_file_name) #正式文件 正式文件格式:20240720-1630.json - file_name = time.strftime("%Y%m%d-%H%M.json", date_time) + file_name = time.strftime("%Y%m%d-%H%M.csv", date_time) file_path = os.path.join(base_path,file_name) logger_cron.info("JOB:"+jobid+", tmpfilepath"+tmp_file_path) all_data = [data] merged_data = merge_data(all_data) - json_data = json.dumps(merged_data) - + + csv_data = json_to_csv_data(merged_data,"") #写入文件 logger_cron.info("JOB: "+jobid+",准备写入文件") - write_large_file(tmp_file_path,json_data) + write_csv(csv_data, tmp_file_path) + #重命名文件 os.rename(tmp_file_path, file_path) - logger_cron.info("JOB: "+jobid+",写入文件完成") def group_and_sum(data, by_fields="ip,jobnum"): diff --git a/utils/data_test.py b/utils/data_test.py new file mode 100644 index 0000000..447cea2 --- /dev/null +++ b/utils/data_test.py @@ -0,0 +1,178 @@ +#!/usr/bin/python +#encoding=utf-8 +# author: tangwy +import re,os,json +import codecs,csv +from db2json import DBUtils +from datetime import datetime, timedelta +from ext_logging import logger_cron,get_clean_file_path +from file_helper import read_large_json_file +from file_merge import entry as merge_entry +from appsUtils.confutil import ConfUtil +from dataInterface.functions import CFunction +from dataInterface.db.params import CPgSqlParam + +date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.json$') + +LOG_TABLE_NAME = "ueba_analysis_schema.logs2" + +DATA_TYPE = { + "IP": 1, + "ACCOUNT": 2, + "INTERFACE": 3, + "MENU": 4, +} + +# 获取当前日期并格式化为"年-月" +def get_current_year_month(): + now = datetime.now() + return now.strftime("%Y_%m") + +# 获取当前月份的第一天并格式化为"年-月-日" +def get_first_day_of_current_month(): + now = datetime.now() + first_day = now.replace(day=1) + return first_day.strftime("%Y-%m-%d") + +# 获取当前日期,然后计算下个月的第一天 +def get_first_day_of_next_month(): + now = datetime.now() + if now.month == 12: + next_month = now.replace(year=now.year+1, month=1, day=1) + else: + next_month = now.replace(month=now.month+1, day=1) + return next_month.strftime("%Y-%m-%d") + +#获取表名 +def get_table_name(): + year_month = get_current_year_month() + return LOG_TABLE_NAME+'_'+ year_month + +#获取表区间 +def get_table_data_range(): + start= get_first_day_of_current_month() + end = get_first_day_of_next_month() + return start,end + + +def get_all_files(path): + # 列出所有包含匹配模式的文件名 + files = [] + for filename in os.listdir(path): + if date_pattern.search(filename): + #由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理 + if datetime.now().strftime("%Y-%m-%d")+".json" != filename: + files.append({"filename": filename, "path": os.path.join(path,filename)}) + return files + +def json_to_csvFile(json_data, csv_file): + # 提取字段名 + fields = json_data[0].keys() # 假设第一个元素包含所有可能的键 + with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式 + writer = csv.DictWriter(csvfile, fieldnames=fields) + writer.writeheader() + for row in json_data: + row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()} + writer.writerow(row) +def csv_to_pg(sql): + logger_cron.info("INSERT: 准备数据入库") + confutil = ConfUtil() + cur_pg_conf = confutil.getPostgresqlConf() + cmd = """psql {} -U {} -w -c \"{}\"""".format(cur_pg_conf["database"],cur_pg_conf["username"],sql) + logger_cron.info("INSERT: "+ cmd) + rtn = os.popen(cmd) + cmd_rtn = rtn.readlines() + logger_cron.info("INSERT: "+ json.dumps(cmd_rtn)) + logger_cron.info("INSERT: 数据入库完成") + +#数据入库 +def insert_data(files): + for itemFile in files: + if os.path.exists(itemFile.get("path",'')): + data =read_large_json_file(itemFile.get("path",'')) + logger_cron.info("INSERT: 准备读取聚合文件:"+itemFile.get('path','')) + logger_cron.info("INSERT: 读取聚合文件完成") + ip_list = data.get('ip', []) + account_list = data.get('account', []) + interface_list = data.get('interface', []) + menu_list = data.get('menu', []) + + logger_cron.info("INSERT: IP维度 " +str(len(ip_list))) + logger_cron.info("INSERT: ACCOUNT维度 " +str(len(account_list))) + logger_cron.info("INSERT: INTERFACE维度 " +str(len(interface_list))) + logger_cron.info("INSERT: MENU维度 " +str(len(menu_list))) + + basename, extension = os.path.splitext(itemFile.get('filename', '')) + log_date = basename + # print ("filename:"+log_date) + records = [] + for item in ip_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("IP",1) + interface = item.get('interface', '') + keys= json.dumps([ip,jobnum]) + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys}) + for item in account_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("ACCOUNT",2) + interface = item.get('interface', '') + keys= json.dumps([account,jobnum]) + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys}) + for item in interface_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("INTERFACE",3) + interface = item.get('interface', '') + keys= json.dumps([interface,ip,account,jobnum]) + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys}) + for item in menu_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("MENU",4) + interface = item.get('interface', '') + keys= json.dumps([menu,ip,account,jobnum]) + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys}) + + csv_file = get_clean_file_path()+"/"+log_date+".csv" + logger_cron.info("INSERT: 开始写csv文件") + json_to_csvFile(records,csv_file) + sql = "\copy ueba_analysis_schema.logs2(count,account,logdate,data_type,ip,interface,menu,jobnum,keys) from '{}' with csv header DELIMITER ',';".format(csv_file) + csv_to_pg(sql) + + #重命名文件 + logger_cron.info(itemFile.get('path','')) + logger_cron.info("done_"+itemFile.get('filename', '')) + os.rename(itemFile.get('path',''),get_clean_file_path()+"/done_"+itemFile.get('filename', '')) + logger_cron.info("INSERT: 重命名文件完成,"+itemFile.get('filename', '')) + + logger_cron.info("done_"+itemFile.get('filename', '')) + os.rename(csv_file,get_clean_file_path()+"/done_"+log_date+".csv") + logger_cron.info("INSERT: csv重命名文件完成") + +def entry(): + # 合并文件 + base_path = get_clean_file_path() + files = get_all_files(base_path) + logger_cron.info("INSERT:获取文件数量"+str(len(files))) + #数据入库 + insert_data(files) + +entry() \ No newline at end of file diff --git a/utils/file_helper.py b/utils/file_helper.py index 3d4303a..0f4efb7 100644 --- a/utils/file_helper.py +++ b/utils/file_helper.py @@ -2,11 +2,18 @@ #encoding=utf-8 # author: tangwy import re,os,json -import codecs +import codecs,csv from db2json import DBUtils from datetime import datetime, timedelta from ext_logging import logger_cron,get_clean_file_path +DATA_TYPE = { + "IP": 1, + "ACCOUNT": 2, + "INTERFACE": 3, + "MENU": 4, +} + #写入大文件5M def write_large_file(filename, data_list, chunk_size=1024*1024*5): with codecs.open(filename, 'w', encoding='utf-8') as f: @@ -15,17 +22,13 @@ def write_large_file(filename, data_list, chunk_size=1024*1024*5): f.write(chunk) #读取大文件 -def read_large_json_file(filename, chunk_size=1024*1024*5): # 每次读取5MB的数据 - json_object = '' - with codecs.open(filename, 'r', encoding='utf-8') as f: - while True: - chunk = f.read(chunk_size) - if not chunk: - break - json_object += chunk - - data = json.loads(json_object) - return data +def read_large_json_file(filename): + rows = [] + with open(filename, 'rb') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + rows.append(dict(row)) + return json.dumps(rows) #删除文件 def delete_frile(file_path): @@ -82,4 +85,63 @@ def merge_data(datasets): ] return aggregated_data - \ No newline at end of file + +# json 转 csv +def json_to_csv_data(data,log_date): + ip_list = data.get('ip', []) + account_list = data.get('account', []) + interface_list = data.get('interface', []) + menu_list = data.get('menu', []) + + records = [] + for item in ip_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("IP",1) + interface = item.get('interface', '') + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) + for item in account_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("ACCOUNT",2) + interface = item.get('interface', '') + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) + for item in interface_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("INTERFACE",3) + interface = item.get('interface', '') + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) + for item in menu_list: + menu = item.get('menu', '') + ip = item.get('ip', '0.0.0.0') + account = item.get('account', '') + jobnum = item.get('jobnum', '') + count = item.get('count', 0) + logdate = log_date + datatype = DATA_TYPE.get("MENU",4) + interface = item.get('interface', '') + records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) + return records + +def write_csv(json_data, csv_file): + # 提取字段名 + fields = json_data[0].keys() # 假设第一个元素包含所有可能的键 + with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式 + writer = csv.DictWriter(csvfile, fieldnames=fields) + writer.writeheader() + for row in json_data: + row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()} + writer.writerow(row) diff --git a/utils/file_merge.py b/utils/file_merge.py index a732ed0..30909e7 100644 --- a/utils/file_merge.py +++ b/utils/file_merge.py @@ -6,10 +6,10 @@ import codecs from db2json import DBUtils from datetime import datetime, timedelta from ext_logging import logger_cron,get_clean_file_path -from file_helper import read_large_json_file,write_large_file,merge_data,delete_frile +from file_helper import read_large_json_file,write_large_file,merge_data,delete_frile,json_to_csv_data,write_csv from collections import defaultdict -date_pattern = re.compile(r'\d{8}-\d{4}\.json') +date_pattern = re.compile(r'\d{8}-\d{4}\.csv') def get_all_files(path): # 列出所有包含匹配模式的文件名 @@ -35,27 +35,42 @@ def merge_all_files(file_dict,base_path): # 遍历字典中的每一个键值对 for date_str, files in file_dict.items(): #20240721 - root_file_path = "{}-{}-{}.json".format(date_str[:4], date_str[4:6], date_str[6:]) + root_file_path = "{}-{}-{}.csv".format(date_str[:4], date_str[4:6], date_str[6:]) full_root_file_path = os.path.join(base_path,root_file_path) if len(files)>0: - file_objs=[] - if os.path.exists(full_root_file_path): - root_data = read_large_json_file(full_root_file_path) - file_objs.append(root_data) - + file_full_path = [] + aggregated_data = {} for filename in files: + file_objs=[] #20240721-0170.json full_path = os.path.join(base_path,filename) file_full_path.append(full_path) logger_cron.info("FILE_MERGE: 准备读取文件"+full_path) + tmp_data =read_large_json_file(full_path) file_objs.append(tmp_data) - + if aggregated_data: + file_objs.append(aggregated_data) + + aggregated_data = merge_data(file_objs) + + # 最后合并基础文件 + result_data_array = [] + if os.path.exists(full_root_file_path): + root_data = read_large_json_file(full_root_file_path) + result_data_array.append(root_data) + + if aggregated_data: + result_data_array.append(aggregated_data) + logger_cron.info("FILE_MERGE: 准备合并文件") - data = merge_data(file_objs) + data = merge_data(result_data_array) + logger_cron.info("FILE_MERGE: 准备写入合并的文件") - write_large_file(full_root_file_path,json.dumps(data)) + + csv_data = json_to_csv_data(data,""), + write_csv(csv_data,full_root_file_path) logger_cron.info("FILE_MERGE: 写入合并文件完成") #准备删除合并文件 for del_file in file_full_path: diff --git a/utils/file_to_pg.py b/utils/file_to_pg.py index 4193feb..f08e244 100644 --- a/utils/file_to_pg.py +++ b/utils/file_to_pg.py @@ -6,13 +6,13 @@ import codecs,csv from db2json import DBUtils from datetime import datetime, timedelta from ext_logging import logger_cron,get_clean_file_path -from file_helper import read_large_json_file +from file_helper import read_large_json_file,json_to_csv_data,write_csv from file_merge import entry as merge_entry from appsUtils.confutil import ConfUtil from dataInterface.functions import CFunction from dataInterface.db.params import CPgSqlParam -date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.json$') +date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.csv$') LOG_TABLE_NAME = "ueba_analysis_schema.logs" @@ -84,20 +84,10 @@ def get_all_files(path): for filename in os.listdir(path): if date_pattern.search(filename): #由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理 - if datetime.now().strftime("%Y-%m-%d")+".json" != filename: + if datetime.now().strftime("%Y-%m-%d")+".csv" != filename: files.append({"filename": filename, "path": os.path.join(path,filename)}) return files -def json_to_csvFile(json_data, csv_file): - # 提取字段名 - fields = json_data[0].keys() # 假设第一个元素包含所有可能的键 - with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式 - writer = csv.DictWriter(csvfile, fieldnames=fields) - writer.writeheader() - for row in json_data: - row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()} - writer.writerow(row) - def csv_to_pg(sql): logger_cron.info("INSERT: 准备数据入库") confutil = ConfUtil() @@ -128,52 +118,12 @@ def insert_data(files): basename, extension = os.path.splitext(itemFile.get('filename', '')) log_date = basename - # print ("filename:"+log_date) - records = [] - for item in ip_list: - menu = item.get('menu', '') - ip = item.get('ip', '0.0.0.0') - account = item.get('account', '') - jobnum = item.get('jobnum', '') - count = item.get('count', 0) - logdate = log_date - datatype = DATA_TYPE.get("IP",1) - interface = item.get('interface', '') - records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) - for item in account_list: - menu = item.get('menu', '') - ip = item.get('ip', '0.0.0.0') - account = item.get('account', '') - jobnum = item.get('jobnum', '') - count = item.get('count', 0) - logdate = log_date - datatype = DATA_TYPE.get("ACCOUNT",2) - interface = item.get('interface', '') - records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) - for item in interface_list: - menu = item.get('menu', '') - ip = item.get('ip', '0.0.0.0') - account = item.get('account', '') - jobnum = item.get('jobnum', '') - count = item.get('count', 0) - logdate = log_date - datatype = DATA_TYPE.get("INTERFACE",3) - interface = item.get('interface', '') - records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) - for item in menu_list: - menu = item.get('menu', '') - ip = item.get('ip', '0.0.0.0') - account = item.get('account', '') - jobnum = item.get('jobnum', '') - count = item.get('count', 0) - logdate = log_date - datatype = DATA_TYPE.get("MENU",4) - interface = item.get('interface', '') - records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface}) - + csv_file = get_clean_file_path()+"/"+log_date+".csv" + records = json_to_csv_data(data,log_date) + logger_cron.info("INSERT: 开始写csv文件") - json_to_csvFile(records,csv_file) + write_csv(records,csv_file) sql = "\copy ueba_analysis_schema.logs(count,account,logdate,data_type,ip,interface,menu,jobnum) from '{}' with csv header DELIMITER ',';".format(csv_file) csv_to_pg(sql)