代码提交

dev
TANGWY 4 months ago
parent 189d1c8cdd
commit 1d0236c5d6
  1. 176
      cron/tmp_log_alarm copy.py
  2. 71
      cron/tmp_log_alarm.py
  3. BIN
      uebaMetricsAnalysis.tar
  4. 13
      utils/base_dataclean_pg.py
  5. 178
      utils/data_test.py
  6. 88
      utils/file_helper.py
  7. 37
      utils/file_merge.py
  8. 64
      utils/file_to_pg.py

@ -0,0 +1,176 @@
# coding:utf-8
import sys
import uuid
import json
import time
import random
# path = str(sys.path[0])
# home_path = path.split("isop_uebaapiData")[0]
# sys.path.append(home_path)
from util import send_logs
def alarm(cookies, api):
"""2、HTTP日志"""
inputstr = '''[{"msgtype":1,"hash":"8DE9-BDAB-F622-2FA8","dev_ip":"10.67.5.17","product":"uts"},{"sid":"6004744450036c44f815500016d00a5f5151105430a3ed","timestamp":1567673939,"sip":"10.67.0.52","sport":5624,"dip":"10.67.0.53","dport":80,"protocol":6,"app":3087428650795009,"app_proto":8,"direct":4,"app.detail":{"method":"GET","http_protocol":"1.1","ret_code":200,"host":"10.67.1.1","uri":"/webtest/uploadFile.php","referer":"http://[2222::65]/webtest/","content_type":" multipart/form-data; boundary=----WebKitFormBoundary2zcCUl4lQf1h7A7S","content_type_server":" text/html","server":"Apache/2.4.4 (Win32) OpenSSL/0.9.8y PHP/5.4.19","user_agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36","link":"","cookies":"loginmainacctid=wangshiguang;operatorId=d2601586;com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;","content_encoding":"","location":"","content_length":70080,"content_length_server":200,"set_cookie":"","range":"","connection":"keep-alive","connection_server":"Keep-Alive","x_forwarded_for":"","post_data":"LS0tLS0tV2ViS2l0Rm9ybUJvdW5kYXJ5MnpjQ1VsNGxRZjFoN0E3Uw0KQ29udGVudC1EaXNwb3NpdGlvbjogZm9ybS1kYXRhOyBuYW1lPSJmaWxlIjsgZmlsZW5hbWU9IjAwMDFhZDQ0MWRjYjM4NjIxOGE3Njk5MmFjZjhiNzA1Ig0=","response_body":"VXBsb2FkOiAwMDAxYWQ0NDFkY2IzODYyMThhNzY5OTJhY2Y4YjcwNTxiciAvPlR5cGU6IGFwcGxpY2F0aW9uL29jdGV0LXN0cmVhbTxiciAvPlNpemU6IDY4LjEyNzkyOTY4NzUgS2I8YnIgLz5UZW1wIGZpbGU6IEQ6XHhhbXBwXHRtcFxwaHA2ODI1LnRtcDxiciAvPjAwMDFhZDQ0MWRjYjM4NjIxOGE3Njk5MmFjZjhiNzA1IGFscmVhZHkgZXhpc3RzLiA="}}]'''
inputarr = json.loads(inputstr, strict=False)
# 随机生成timestamp
inputarr[1]["timestamp"] = int(time.time())
inputarr[1]["sid"] = str(uuid.uuid1())
# inputarr[1]["sip"] = "10.67.4.33"
inputarr[1]["sip"] = generate_random_ip()
inputarr[1]["dip"] = "10.67.1.1"
inputarr[1]["dport"] = "8180"
inputarr[1]["app.detail"]["uri"] = "/alarmtest.action?BMECID=352432757&BMETimestamp=1692788489260&queryNumber=158713459"
inputarr[1]["app.detail"]["host"] = api
inputarr[1]["app.detail"]["cookies"] = cookies
inputarr[1]["account"] = get_random_person()
inputarr[1]["trojan_type"] = get_random_jobnum()
inputarr[1]["worm_family"] = get_random_menu()
inputarr[1]["interface"] = get_random_inteface()
return json.dumps(inputarr)
def generate_random_ip():
# 固定前缀 "192.168."
prefix = "192.168."
# 生成随机的第三和第四段IP地址
third_octet = 19
fourth_octet = random.randint(0, 50)
# 拼接IP地址
ip = "{}{}.{}".format(prefix, third_octet, fourth_octet)
return ip
def AbIDVisitAPINums510404():
datalist = {"TCP_5011": list()}
ID2Area = {
"武汉": ["1101820", "1101821", "1101822", "1101823", "1101825"],
"荆州": ["2001800", "2001801", "2001808"],
"江汉": ["1801820", "1801810"],
"省公司市场部": ["1002011", "1002012", "1002013"]
}
api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com",
"good.alarm.com"]
info_list = [
["u-locale=zh_CN;loginmainacctid=zhang3;operatorId=" + ID2Area["武汉"][
0] + ";com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;", 60],
["u-locale=zh_CN;loginmainacctid=zhang3;operatorId=" + ID2Area["荆州"][
2] + ";com.huawei.boss.CURRENT_MENUID=BLAR_ChargeCrm3_WEB;", 120]
]
for i in range(len(info_list)):
cookies = info_list[i][0]
count = info_list[i][1]
for j in range(count):
api = random.choice(api_list)
datalist["TCP_5011"].append(alarm(cookies, api))
for key in datalist.keys():
send_logs(datalist[key])
return "510405场景的告警数据已生成"
def get_random_jobnum():
# 定义包含不同前缀的字符串数组
prefix_strings = [
['10243', '10895', '10134', '10781', '10962'], # 10打头的字符串示例
['11089', '11057', '11023', '11016', '11030'], # 110打头的字符串示例
['14076', '14049', '14098', '14032', '14061'], # 140打头的字符串示例
['26054', '26013', '26087', '26029', '26061'], # 260打头的字符串示例
['20083', '20015', '20072', '20096', '20048'], # 200打头的字符串示例
['19035', '19017', '19049', '19082', '19096'], # 190打头的字符串示例
['180237', '180276', '180204', '180295', '180219'] # 1802打头的字符串示例
]
# 随机选择一个前缀数组
selected_prefix_array = random.choice(prefix_strings)
# 随机选择一个具体的字符串
selected_string = random.choice(selected_prefix_array)
return selected_string
def get_random_person():
people_list = [
"Alice", "Bob", "Charlie", "David", "Emma", "Frank", "Grace2","Alice2", "Bob2", "Charlie2", "David2", "Emma2", "Frank2", "Grace2"
]
random_person = random.choice(people_list)
return random_person
def get_random_menu():
# 定义系统菜单列表
system_menu = [
"开发", "测试", "部署", "配置", "设置", "安装", "卸载", "升级", "更新",
"修复", "修正", "修补", "更新", "安全", "保护", "防护", "防御", "防止",
"检查", "扫描", "监控", "跟踪", "追踪", "审计", "审查", "测试", "测量"
]
# 随机选择一个菜单项
random_menu_item = random.choice(system_menu)
return random_menu_item
def get_random_inteface():
# 定义系统菜单列表
system_menu = [
"http://bai1.doc.com/api", "http://bai2.doc.com/api", "http://bai3.doc.com/api", "http://bai4.doc.com/api", "http://bai5.doc.com/api", "http://bai12.doc.com/api","http://bai13.doc.com/api", "http://bai19.doc.com/api",
"http://bai6.doc.com/api", "http://bai7.doc.com/api", "http://bai8.doc.com/api", "http://bai9.doc.com/api", "http://bai11.doc.com/api"
]
# 随机选择一个菜单项
random_menu_item = random.choice(system_menu)
return random_menu_item
if __name__ == '__main__':
datalist = {"TCP_5011": list()}
ID2Area = {
"武汉": ["1101820", "1101821", "1101822", "1101823", "1101825"],
"荆州": ["2001800", "2001801", "2001808"],
"江汉": ["1801820", "1801810"],
"省公司市场部": ["1002011", "1002012", "1002013"]
}
api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", "good.alarm.com","baidu.com","sohu.com","xinlang.com","erpx.com"]
info_list = [
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 400],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 300],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 200]
]
for i in range(len(info_list)):
cookies = info_list[i][0]
count = info_list[i][1]
for j in range(count):
api = random.choice(api_list)
datalist["TCP_5011"].append(alarm(cookies, api))
for key in datalist.keys():
send_logs(datalist[key])
print "510405场景的告警数据已生成"

@ -113,76 +113,7 @@ if __name__ == '__main__':
api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", "good.alarm.com","baidu.com","sohu.com","xinlang.com","erpx.com"] api_list = ["test.alarm.com/webtest", "alarm.com/testalarm", "business.system..alarmcom", "hhh.alarm.com", "good.alarm.com","baidu.com","sohu.com","xinlang.com","erpx.com"]
info_list = [ info_list = [
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000], ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000], ["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000]
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 3000],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 5000],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 111],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100],
["u-locale=zh_CN; loginmainacctid="+get_random_person()+"; userticket=209@9889@23223@10.0.1.183@lis8; operatorId=" + get_random_jobnum() + "; com.huawei.boss.CURRENT_MENUID="+get_random_menu()+";", 100]
] ]

Binary file not shown.

@ -8,6 +8,7 @@ import calendar
import codecs import codecs
from esUtil import EsUtil from esUtil import EsUtil
from file_helper import write_large_file,merge_data from file_helper import write_large_file,merge_data
from file_helper import read_large_json_file,json_to_csv_data,write_csv
from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron,get_clean_file_path from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron,get_clean_file_path
from collections import defaultdict from collections import defaultdict
@ -273,23 +274,23 @@ def group_and_write_to_file(data_ip, data_account, data_interface, data_menu, st
logger_cron.info("JOB: "+jobid+",写入文件base路径"+base_path) logger_cron.info("JOB: "+jobid+",写入文件base路径"+base_path)
date_time = convert_utc_to_local_time(start) date_time = convert_utc_to_local_time(start)
#临时文件 临时文件格式:20240720-1630_tmp.json #临时文件 临时文件格式:20240720-1630_tmp.json
tmp_file_name = time.strftime("%Y%m%d-%H%M_tmp.json", date_time) tmp_file_name = time.strftime("%Y%m%d-%H%M_tmp.csv", date_time)
tmp_file_path = os.path.join(base_path,tmp_file_name) tmp_file_path = os.path.join(base_path,tmp_file_name)
#正式文件 正式文件格式:20240720-1630.json #正式文件 正式文件格式:20240720-1630.json
file_name = time.strftime("%Y%m%d-%H%M.json", date_time) file_name = time.strftime("%Y%m%d-%H%M.csv", date_time)
file_path = os.path.join(base_path,file_name) file_path = os.path.join(base_path,file_name)
logger_cron.info("JOB:"+jobid+", tmpfilepath"+tmp_file_path) logger_cron.info("JOB:"+jobid+", tmpfilepath"+tmp_file_path)
all_data = [data] all_data = [data]
merged_data = merge_data(all_data) merged_data = merge_data(all_data)
json_data = json.dumps(merged_data)
csv_data = json_to_csv_data(merged_data,"")
#写入文件 #写入文件
logger_cron.info("JOB: "+jobid+",准备写入文件") logger_cron.info("JOB: "+jobid+",准备写入文件")
write_large_file(tmp_file_path,json_data) write_csv(csv_data, tmp_file_path)
#重命名文件 #重命名文件
os.rename(tmp_file_path, file_path) os.rename(tmp_file_path, file_path)
logger_cron.info("JOB: "+jobid+",写入文件完成") logger_cron.info("JOB: "+jobid+",写入文件完成")
def group_and_sum(data, by_fields="ip,jobnum"): def group_and_sum(data, by_fields="ip,jobnum"):

@ -0,0 +1,178 @@
#!/usr/bin/python
#encoding=utf-8
# author: tangwy
import re,os,json
import codecs,csv
from db2json import DBUtils
from datetime import datetime, timedelta
from ext_logging import logger_cron,get_clean_file_path
from file_helper import read_large_json_file
from file_merge import entry as merge_entry
from appsUtils.confutil import ConfUtil
from dataInterface.functions import CFunction
from dataInterface.db.params import CPgSqlParam
date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.json$')
LOG_TABLE_NAME = "ueba_analysis_schema.logs2"
DATA_TYPE = {
"IP": 1,
"ACCOUNT": 2,
"INTERFACE": 3,
"MENU": 4,
}
# 获取当前日期并格式化为"年-月"
def get_current_year_month():
now = datetime.now()
return now.strftime("%Y_%m")
# 获取当前月份的第一天并格式化为"年-月-日"
def get_first_day_of_current_month():
now = datetime.now()
first_day = now.replace(day=1)
return first_day.strftime("%Y-%m-%d")
# 获取当前日期,然后计算下个月的第一天
def get_first_day_of_next_month():
now = datetime.now()
if now.month == 12:
next_month = now.replace(year=now.year+1, month=1, day=1)
else:
next_month = now.replace(month=now.month+1, day=1)
return next_month.strftime("%Y-%m-%d")
#获取表名
def get_table_name():
year_month = get_current_year_month()
return LOG_TABLE_NAME+'_'+ year_month
#获取表区间
def get_table_data_range():
start= get_first_day_of_current_month()
end = get_first_day_of_next_month()
return start,end
def get_all_files(path):
# 列出所有包含匹配模式的文件名
files = []
for filename in os.listdir(path):
if date_pattern.search(filename):
#由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理
if datetime.now().strftime("%Y-%m-%d")+".json" != filename:
files.append({"filename": filename, "path": os.path.join(path,filename)})
return files
def json_to_csvFile(json_data, csv_file):
# 提取字段名
fields = json_data[0].keys() # 假设第一个元素包含所有可能的键
with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for row in json_data:
row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()}
writer.writerow(row)
def csv_to_pg(sql):
logger_cron.info("INSERT: 准备数据入库")
confutil = ConfUtil()
cur_pg_conf = confutil.getPostgresqlConf()
cmd = """psql {} -U {} -w -c \"{}\"""".format(cur_pg_conf["database"],cur_pg_conf["username"],sql)
logger_cron.info("INSERT: "+ cmd)
rtn = os.popen(cmd)
cmd_rtn = rtn.readlines()
logger_cron.info("INSERT: "+ json.dumps(cmd_rtn))
logger_cron.info("INSERT: 数据入库完成")
#数据入库
def insert_data(files):
for itemFile in files:
if os.path.exists(itemFile.get("path",'')):
data =read_large_json_file(itemFile.get("path",''))
logger_cron.info("INSERT: 准备读取聚合文件:"+itemFile.get('path',''))
logger_cron.info("INSERT: 读取聚合文件完成")
ip_list = data.get('ip', [])
account_list = data.get('account', [])
interface_list = data.get('interface', [])
menu_list = data.get('menu', [])
logger_cron.info("INSERT: IP维度 " +str(len(ip_list)))
logger_cron.info("INSERT: ACCOUNT维度 " +str(len(account_list)))
logger_cron.info("INSERT: INTERFACE维度 " +str(len(interface_list)))
logger_cron.info("INSERT: MENU维度 " +str(len(menu_list)))
basename, extension = os.path.splitext(itemFile.get('filename', ''))
log_date = basename
# print ("filename:"+log_date)
records = []
for item in ip_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("IP",1)
interface = item.get('interface', '')
keys= json.dumps([ip,jobnum])
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys})
for item in account_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("ACCOUNT",2)
interface = item.get('interface', '')
keys= json.dumps([account,jobnum])
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys})
for item in interface_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("INTERFACE",3)
interface = item.get('interface', '')
keys= json.dumps([interface,ip,account,jobnum])
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys})
for item in menu_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("MENU",4)
interface = item.get('interface', '')
keys= json.dumps([menu,ip,account,jobnum])
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface,"keys":keys})
csv_file = get_clean_file_path()+"/"+log_date+".csv"
logger_cron.info("INSERT: 开始写csv文件")
json_to_csvFile(records,csv_file)
sql = "\copy ueba_analysis_schema.logs2(count,account,logdate,data_type,ip,interface,menu,jobnum,keys) from '{}' with csv header DELIMITER ',';".format(csv_file)
csv_to_pg(sql)
#重命名文件
logger_cron.info(itemFile.get('path',''))
logger_cron.info("done_"+itemFile.get('filename', ''))
os.rename(itemFile.get('path',''),get_clean_file_path()+"/done_"+itemFile.get('filename', ''))
logger_cron.info("INSERT: 重命名文件完成,"+itemFile.get('filename', ''))
logger_cron.info("done_"+itemFile.get('filename', ''))
os.rename(csv_file,get_clean_file_path()+"/done_"+log_date+".csv")
logger_cron.info("INSERT: csv重命名文件完成")
def entry():
# 合并文件
base_path = get_clean_file_path()
files = get_all_files(base_path)
logger_cron.info("INSERT:获取文件数量"+str(len(files)))
#数据入库
insert_data(files)
entry()

@ -2,11 +2,18 @@
#encoding=utf-8 #encoding=utf-8
# author: tangwy # author: tangwy
import re,os,json import re,os,json
import codecs import codecs,csv
from db2json import DBUtils from db2json import DBUtils
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ext_logging import logger_cron,get_clean_file_path from ext_logging import logger_cron,get_clean_file_path
DATA_TYPE = {
"IP": 1,
"ACCOUNT": 2,
"INTERFACE": 3,
"MENU": 4,
}
#写入大文件5M #写入大文件5M
def write_large_file(filename, data_list, chunk_size=1024*1024*5): def write_large_file(filename, data_list, chunk_size=1024*1024*5):
with codecs.open(filename, 'w', encoding='utf-8') as f: with codecs.open(filename, 'w', encoding='utf-8') as f:
@ -15,17 +22,13 @@ def write_large_file(filename, data_list, chunk_size=1024*1024*5):
f.write(chunk) f.write(chunk)
#读取大文件 #读取大文件
def read_large_json_file(filename, chunk_size=1024*1024*5): # 每次读取5MB的数据 def read_large_json_file(filename):
json_object = '' rows = []
with codecs.open(filename, 'r', encoding='utf-8') as f: with open(filename, 'rb') as csvfile:
while True: reader = csv.DictReader(csvfile)
chunk = f.read(chunk_size) for row in reader:
if not chunk: rows.append(dict(row))
break return json.dumps(rows)
json_object += chunk
data = json.loads(json_object)
return data
#删除文件 #删除文件
def delete_frile(file_path): def delete_frile(file_path):
@ -82,4 +85,63 @@ def merge_data(datasets):
] ]
return aggregated_data return aggregated_data
# json 转 csv
def json_to_csv_data(data,log_date):
ip_list = data.get('ip', [])
account_list = data.get('account', [])
interface_list = data.get('interface', [])
menu_list = data.get('menu', [])
records = []
for item in ip_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("IP",1)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in account_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("ACCOUNT",2)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in interface_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("INTERFACE",3)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in menu_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("MENU",4)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
return records
def write_csv(json_data, csv_file):
# 提取字段名
fields = json_data[0].keys() # 假设第一个元素包含所有可能的键
with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for row in json_data:
row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()}
writer.writerow(row)

@ -6,10 +6,10 @@ import codecs
from db2json import DBUtils from db2json import DBUtils
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ext_logging import logger_cron,get_clean_file_path from ext_logging import logger_cron,get_clean_file_path
from file_helper import read_large_json_file,write_large_file,merge_data,delete_frile from file_helper import read_large_json_file,write_large_file,merge_data,delete_frile,json_to_csv_data,write_csv
from collections import defaultdict from collections import defaultdict
date_pattern = re.compile(r'\d{8}-\d{4}\.json') date_pattern = re.compile(r'\d{8}-\d{4}\.csv')
def get_all_files(path): def get_all_files(path):
# 列出所有包含匹配模式的文件名 # 列出所有包含匹配模式的文件名
@ -35,27 +35,42 @@ def merge_all_files(file_dict,base_path):
# 遍历字典中的每一个键值对 # 遍历字典中的每一个键值对
for date_str, files in file_dict.items(): for date_str, files in file_dict.items():
#20240721 #20240721
root_file_path = "{}-{}-{}.json".format(date_str[:4], date_str[4:6], date_str[6:]) root_file_path = "{}-{}-{}.csv".format(date_str[:4], date_str[4:6], date_str[6:])
full_root_file_path = os.path.join(base_path,root_file_path) full_root_file_path = os.path.join(base_path,root_file_path)
if len(files)>0: if len(files)>0:
file_objs=[]
if os.path.exists(full_root_file_path):
root_data = read_large_json_file(full_root_file_path)
file_objs.append(root_data)
file_full_path = [] file_full_path = []
aggregated_data = {}
for filename in files: for filename in files:
file_objs=[]
#20240721-0170.json #20240721-0170.json
full_path = os.path.join(base_path,filename) full_path = os.path.join(base_path,filename)
file_full_path.append(full_path) file_full_path.append(full_path)
logger_cron.info("FILE_MERGE: 准备读取文件"+full_path) logger_cron.info("FILE_MERGE: 准备读取文件"+full_path)
tmp_data =read_large_json_file(full_path) tmp_data =read_large_json_file(full_path)
file_objs.append(tmp_data) file_objs.append(tmp_data)
if aggregated_data:
file_objs.append(aggregated_data)
aggregated_data = merge_data(file_objs)
# 最后合并基础文件
result_data_array = []
if os.path.exists(full_root_file_path):
root_data = read_large_json_file(full_root_file_path)
result_data_array.append(root_data)
if aggregated_data:
result_data_array.append(aggregated_data)
logger_cron.info("FILE_MERGE: 准备合并文件") logger_cron.info("FILE_MERGE: 准备合并文件")
data = merge_data(file_objs) data = merge_data(result_data_array)
logger_cron.info("FILE_MERGE: 准备写入合并的文件") logger_cron.info("FILE_MERGE: 准备写入合并的文件")
write_large_file(full_root_file_path,json.dumps(data))
csv_data = json_to_csv_data(data,""),
write_csv(csv_data,full_root_file_path)
logger_cron.info("FILE_MERGE: 写入合并文件完成") logger_cron.info("FILE_MERGE: 写入合并文件完成")
#准备删除合并文件 #准备删除合并文件
for del_file in file_full_path: for del_file in file_full_path:

@ -6,13 +6,13 @@ import codecs,csv
from db2json import DBUtils from db2json import DBUtils
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ext_logging import logger_cron,get_clean_file_path from ext_logging import logger_cron,get_clean_file_path
from file_helper import read_large_json_file from file_helper import read_large_json_file,json_to_csv_data,write_csv
from file_merge import entry as merge_entry from file_merge import entry as merge_entry
from appsUtils.confutil import ConfUtil from appsUtils.confutil import ConfUtil
from dataInterface.functions import CFunction from dataInterface.functions import CFunction
from dataInterface.db.params import CPgSqlParam from dataInterface.db.params import CPgSqlParam
date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.json$') date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}.csv$')
LOG_TABLE_NAME = "ueba_analysis_schema.logs" LOG_TABLE_NAME = "ueba_analysis_schema.logs"
@ -84,20 +84,10 @@ def get_all_files(path):
for filename in os.listdir(path): for filename in os.listdir(path):
if date_pattern.search(filename): if date_pattern.search(filename):
#由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理 #由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理
if datetime.now().strftime("%Y-%m-%d")+".json" != filename: if datetime.now().strftime("%Y-%m-%d")+".csv" != filename:
files.append({"filename": filename, "path": os.path.join(path,filename)}) files.append({"filename": filename, "path": os.path.join(path,filename)})
return files return files
def json_to_csvFile(json_data, csv_file):
# 提取字段名
fields = json_data[0].keys() # 假设第一个元素包含所有可能的键
with open(csv_file, 'wb') as csvfile: # 注意这里使用 'wb' 模式
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for row in json_data:
row = {k: v.encode('utf-8') if isinstance(v, unicode) else v for k, v in row.items()}
writer.writerow(row)
def csv_to_pg(sql): def csv_to_pg(sql):
logger_cron.info("INSERT: 准备数据入库") logger_cron.info("INSERT: 准备数据入库")
confutil = ConfUtil() confutil = ConfUtil()
@ -128,52 +118,12 @@ def insert_data(files):
basename, extension = os.path.splitext(itemFile.get('filename', '')) basename, extension = os.path.splitext(itemFile.get('filename', ''))
log_date = basename log_date = basename
# print ("filename:"+log_date)
records = []
for item in ip_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("IP",1)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in account_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("ACCOUNT",2)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in interface_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("INTERFACE",3)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
for item in menu_list:
menu = item.get('menu', '')
ip = item.get('ip', '0.0.0.0')
account = item.get('account', '')
jobnum = item.get('jobnum', '')
count = item.get('count', 0)
logdate = log_date
datatype = DATA_TYPE.get("MENU",4)
interface = item.get('interface', '')
records.append({"menu":menu, "ip":ip, "account":account, "jobnum":jobnum, "count":count, "logdate":logdate,"data_type":datatype,"interface":interface})
csv_file = get_clean_file_path()+"/"+log_date+".csv" csv_file = get_clean_file_path()+"/"+log_date+".csv"
records = json_to_csv_data(data,log_date)
logger_cron.info("INSERT: 开始写csv文件") logger_cron.info("INSERT: 开始写csv文件")
json_to_csvFile(records,csv_file) write_csv(records,csv_file)
sql = "\copy ueba_analysis_schema.logs(count,account,logdate,data_type,ip,interface,menu,jobnum) from '{}' with csv header DELIMITER ',';".format(csv_file) sql = "\copy ueba_analysis_schema.logs(count,account,logdate,data_type,ip,interface,menu,jobnum) from '{}' with csv header DELIMITER ',';".format(csv_file)
csv_to_pg(sql) csv_to_pg(sql)

Loading…
Cancel
Save