@ -6,13 +6,13 @@ import codecs,csv
from db2json import DBUtils
from db2json import DBUtils
from datetime import datetime , timedelta
from datetime import datetime , timedelta
from ext_logging import logger_cron , get_clean_file_path
from ext_logging import logger_cron , get_clean_file_path
from file_helper import read_large_json_file , json_to_csv_data , write_csv
from file_helper import read_large_json_file
from file_merge import entry as merge_entry
from file_merge import entry as merge_entry
from appsUtils . confutil import ConfUtil
from appsUtils . confutil import ConfUtil
from dataInterface . functions import CFunction
from dataInterface . functions import CFunction
from dataInterface . db . params import CPgSqlParam
from dataInterface . db . params import CPgSqlParam
date_pattern = re . compile ( r ' ^ \ d {4} - \ d {2} - \ d {2} .csv $ ' )
date_pattern = re . compile ( r ' ^ \ d {4} - \ d {2} - \ d {2} .json $ ' )
LOG_TABLE_NAME = " ueba_analysis_schema.logs "
LOG_TABLE_NAME = " ueba_analysis_schema.logs "
@ -84,10 +84,20 @@ def get_all_files(path):
for filename in os . listdir ( path ) :
for filename in os . listdir ( path ) :
if date_pattern . search ( filename ) :
if date_pattern . search ( filename ) :
#由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理
#由于定时任务是凌晨3点执行 所以只处理昨天的数据,今天的不处理
if datetime . now ( ) . strftime ( " % Y- % m- %d " ) + " .csv " != filename :
if datetime . now ( ) . strftime ( " % Y- % m- %d " ) + " .json " != filename :
files . append ( { " filename " : filename , " path " : os . path . join ( path , filename ) } )
files . append ( { " filename " : filename , " path " : os . path . join ( path , filename ) } )
return files
return files
def json_to_csvFile ( json_data , csv_file ) :
# 提取字段名
fields = json_data [ 0 ] . keys ( ) # 假设第一个元素包含所有可能的键
with open ( csv_file , ' wb ' ) as csvfile : # 注意这里使用 'wb' 模式
writer = csv . DictWriter ( csvfile , fieldnames = fields )
writer . writeheader ( )
for row in json_data :
row = { k : v . encode ( ' utf-8 ' ) if isinstance ( v , unicode ) else v for k , v in row . items ( ) }
writer . writerow ( row )
def csv_to_pg ( sql ) :
def csv_to_pg ( sql ) :
logger_cron . info ( " INSERT: 准备数据入库 " )
logger_cron . info ( " INSERT: 准备数据入库 " )
confutil = ConfUtil ( )
confutil = ConfUtil ( )
@ -118,12 +128,52 @@ def insert_data(files):
basename , extension = os . path . splitext ( itemFile . get ( ' filename ' , ' ' ) )
basename , extension = os . path . splitext ( itemFile . get ( ' filename ' , ' ' ) )
log_date = basename
log_date = basename
# print ("filename:"+log_date)
csv_file = get_clean_file_path ( ) + " / " + log_date + " .csv "
records = [ ]
records = json_to_csv_data ( data , log_date )
for item in ip_list :
menu = item . get ( ' menu ' , ' ' )
ip = item . get ( ' ip ' , ' 0.0.0.0 ' )
account = item . get ( ' account ' , ' ' )
jobnum = item . get ( ' jobnum ' , ' ' )
count = item . get ( ' count ' , 0 )
logdate = log_date
datatype = DATA_TYPE . get ( " IP " , 1 )
interface = item . get ( ' interface ' , ' ' )
records . append ( { " menu " : menu , " ip " : ip , " account " : account , " jobnum " : jobnum , " count " : count , " logdate " : logdate , " data_type " : datatype , " interface " : interface } )
for item in account_list :
menu = item . get ( ' menu ' , ' ' )
ip = item . get ( ' ip ' , ' 0.0.0.0 ' )
account = item . get ( ' account ' , ' ' )
jobnum = item . get ( ' jobnum ' , ' ' )
count = item . get ( ' count ' , 0 )
logdate = log_date
datatype = DATA_TYPE . get ( " ACCOUNT " , 2 )
interface = item . get ( ' interface ' , ' ' )
records . append ( { " menu " : menu , " ip " : ip , " account " : account , " jobnum " : jobnum , " count " : count , " logdate " : logdate , " data_type " : datatype , " interface " : interface } )
for item in interface_list :
menu = item . get ( ' menu ' , ' ' )
ip = item . get ( ' ip ' , ' 0.0.0.0 ' )
account = item . get ( ' account ' , ' ' )
jobnum = item . get ( ' jobnum ' , ' ' )
count = item . get ( ' count ' , 0 )
logdate = log_date
datatype = DATA_TYPE . get ( " INTERFACE " , 3 )
interface = item . get ( ' interface ' , ' ' )
records . append ( { " menu " : menu , " ip " : ip , " account " : account , " jobnum " : jobnum , " count " : count , " logdate " : logdate , " data_type " : datatype , " interface " : interface } )
for item in menu_list :
menu = item . get ( ' menu ' , ' ' )
ip = item . get ( ' ip ' , ' 0.0.0.0 ' )
account = item . get ( ' account ' , ' ' )
jobnum = item . get ( ' jobnum ' , ' ' )
count = item . get ( ' count ' , 0 )
logdate = log_date
datatype = DATA_TYPE . get ( " MENU " , 4 )
interface = item . get ( ' interface ' , ' ' )
records . append ( { " menu " : menu , " ip " : ip , " account " : account , " jobnum " : jobnum , " count " : count , " logdate " : logdate , " data_type " : datatype , " interface " : interface } )
csv_file = get_clean_file_path ( ) + " / " + log_date + " .csv "
logger_cron . info ( " INSERT: 开始写csv文件 " )
logger_cron . info ( " INSERT: 开始写csv文件 " )
write_csv ( records , csv_file )
json_to_csvFile ( records , csv_file )
sql = " \ copy ueba_analysis_schema.logs(count,account,logdate,data_type,ip,interface,menu,jobnum) from ' {} ' with csv header DELIMITER ' , ' ; " . format ( csv_file )
sql = " \ copy ueba_analysis_schema.logs(count,account,logdate,data_type,ip,interface,menu,jobnum) from ' {} ' with csv header DELIMITER ' , ' ; " . format ( csv_file )
csv_to_pg ( sql )
csv_to_pg ( sql )