# coding=utf-8
"""
@Author : fu - zhe
@FileName : db2json . py
@DateTime : 2024 / 5 / 15 10 : 19
@Description : 数据库工具查询之后的数据为 list嵌套 需要转换成 前端方便识别的json数据
"""
import json
import traceback
import random , string
import traceback , json
from datetime import datetime , timedelta , time
from dataInterface . functions import CFunction
from dataInterface . db . params import CPgSqlParam
from uebaMetricsAnalysis . utils . ext_logging import logger , logger_cron
class DBType ( object ) :
LIST = ' list '
DICT = ' dict '
JOB_TABLE_NAME = " ueba_analysis_schema.jobs "
class DBUtils ( object ) :
@classmethod
def transition ( cls , filed , sql , data_type ) :
"""
数据转换 将json格式的data转换成前端方便使用的 json
: param data_type : 转换数据类型 目前有 list dict两种
: param filed : list 格式 数据库字段 例如 select id , name from table ; filed = [ ' id ' , ' name ' ]
: param sql : sql语句 ` select * from table `
: return : [ { filed1 : value1 , filed2 : value2 , · · · } , · · · · ] 或 { filed1 : value1 , filed2 : value2 , · · · }
"""
data = cls . execute ( sql = sql )
if cls . is_list_of_empty_lists ( data ) :
return eval ( data_type ) ( )
if data_type == DBType . DICT :
data = data [ 0 ]
if len ( filed ) != len ( data ) :
raise Exception ( " {} 与数据库查询结果长度不一致 " . format ( filed ) )
res = { cls . snake2camel ( filed [ i ] ) : data [ i ] for i in range ( len ( filed ) ) }
logger . info ( " res = {} " . format ( res ) )
return res
if data_type == DBType . LIST :
res = [ ]
if not data :
return res
for item in data :
if len ( item ) != len ( filed ) :
raise Exception ( " {} 与数据库查询结果长度不一致 " . format ( filed ) )
res . append ( { cls . snake2camel ( filed [ i ] ) : item [ i ] for i in range ( len ( filed ) ) } )
return res
@classmethod
def snake2camel ( cls , snake_filed ) :
"""
蛇形命名转换小驼峰命名
: param snake_filed : 蛇形命名 例如 user_name
: return : 驼峰命名 user_name - - - > userName
"""
if not snake_filed or not isinstance ( snake_filed , str ) :
return snake_filed
parts = snake_filed . split ( ' _ ' )
# 转换第一个单词为小写,其余单词首字母大写
camel = parts [ 0 ] + ' ' . join ( word . capitalize ( ) for word in parts [ 1 : ] )
return camel
@classmethod
def execute ( cls , sql ) :
"""
执行sql语句
: param sql : sql语句
: return :
"""
try :
sql_list = CPgSqlParam ( sql )
#logger.info("execute sql:"+sql)
data = CFunction . execute ( sql_list )
logger . info ( " execute result : {} " . format ( data ) )
return json . loads ( data )
except Exception as e :
logger . error ( " execute sql error sql: \n {} \n tracback: {} \n " . format ( sql , traceback . format_exc ( ) ) )
raise Exception ( " 查询失败 " )
@classmethod
def is_list_of_empty_lists ( cls , target_list ) :
# 更清晰地检查每个子列表是否为空
return all ( not sublist or len ( sublist ) == 0 for sublist in target_list )
@classmethod
def list_snake2camel ( cls , snake_list ) :
"""
将列表中 字典的snake命名变成camel命名格式
: param snake_list : list内部都是蛇形命名的dict ` [ { ' user_name ' : ' ' , ' user_age ' : ' ' } ] `
: return : ` [ { ' user_name ' : ' ' , ' user_age ' : ' ' } ] ` - - - - > ` [ { ' userName ' : ' ' , ' userAge ' : ' ' } ] `
"""
camel_list = [ ]
for snake_dict in snake_list :
camel_list . append ( { cls . snake2camel ( snake ) : value for snake , value in snake_dict . items ( ) } )
return camel_list
@classmethod
def write_job_status ( self , job_id , status , err , run_count ) :
#success
if status == 2 :
sql = """ update {JOB_TABLE_NAME} set status= %s ,complate_time = %s ,run_count = %s
where job_id = % s """ .format(JOB_TABLE_NAME=JOB_TABLE_NAME)
CFunction . execute ( CPgSqlParam ( sql , params = ( status , datetime . now ( ) , run_count , job_id ) ) )
#failed
if status == 3 :
sql = """ update {JOB_TABLE_NAME} set status= %s , err= %s ,run_count = %s
where job_id = % s """ .format(JOB_TABLE_NAME=JOB_TABLE_NAME)
CFunction . execute ( CPgSqlParam ( sql , params = ( status , err , run_count , job_id ) ) )
@classmethod
def insert_job_record ( self , job_id , start_time , end_time , status ) :
sql = """ insert into {JOB_TABLE_NAME} (job_id,start_time,end_time,status,run_count) values( %s , %s , %s , %s , %s ) """ . format ( JOB_TABLE_NAME = JOB_TABLE_NAME )
CFunction . execute ( CPgSqlParam ( sql , params = ( job_id , start_time , end_time , status , 1 ) ) )
#获取 job的执行时间 开始时间-结束时间
@classmethod
def get_job_period ( self ) :
sql = """ select job_id, to_char(end_time, ' YYYY-MM-DD HH24:MI:SS ' ) as end_time,status,run_count,to_char(start_time, ' YYYY-MM-DD HH24:MI:SS ' ) as start_time from {JOB_TABLE_NAME} order by end_time desc limit 1 """ . format ( JOB_TABLE_NAME = JOB_TABLE_NAME )
res = json . loads ( CFunction . execute ( CPgSqlParam ( sql , params = ( ) ) ) )
# print json.dumps(res)
data = { }
if res :
data [ " job_id " ] = res [ 0 ] [ 0 ]
data [ " end_time " ] = res [ 0 ] [ 1 ]
data [ " status " ] = res [ 0 ] [ 2 ]
data [ " run_count " ] = res [ 0 ] [ 3 ]
data [ " start_time " ] = res [ 0 ] [ 4 ]
if len ( data ) == 0 :
start_time = datetime . now ( ) - timedelta ( minutes = 20 )
end_time = datetime . now ( ) - timedelta ( minutes = 15 )
return start_time , end_time , 2 , 0 , " "
if len ( data ) > 0 :
#运行中
if data [ " status " ] == 1 :
return None , None , 1 , data [ " run_count " ] , data [ " job_id " ]
#运行失败 重试不超过3次
if data [ " status " ] == 3 and data [ " run_count " ] < = 3 :
start_time = datetime . strptime ( data [ " start_time " ] , ' % Y- % m- %d % H: % M: % S ' )
end_time = datetime . strptime ( data [ " end_time " ] , ' % Y- % m- %d % H: % M: % S ' )
return start_time , end_time , 3 , data [ " run_count " ] , data [ " job_id " ]
start_time = datetime . strptime ( data [ " end_time " ] , ' % Y- % m- %d % H: % M: % S ' )
# 检查时间是否为23:59:59
if start_time . hour == 23 and start_time . minute == 59 and start_time . second == 59 :
# 是的话,增加一天并设置时间为00:00:00
start_time = start_time + timedelta ( days = 1 )
start_time = start_time . replace ( hour = 0 , minute = 0 , second = 0 )
end_time = ( datetime . now ( ) - timedelta ( minutes = 15 ) )
#判断间隔是否大于20分钟。大于的话再将时间切片
start_time , end_time = self . reset_start_end_time ( start_time , end_time )
#kafka数据到es会存在5分钟左右的数据延迟,这里设置15分钟
if end_time > ( datetime . now ( ) - timedelta ( minutes = 15 ) ) :
logger_cron . info ( " end_time: " + end_time . strftime ( " % Y- % m- %d % H: % M: % S " ) + " ,datetime.now: " + datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " ) )
return None , None , 2 , 999 , " "
start_time , end_time = self . adjust_end_time_if_cross_day ( start_time , end_time )
#需要在入口生成jobid 所以给空
return start_time , end_time , data [ " status " ] , 0 , " "
@classmethod
def reset_start_end_time ( self , start_time , end_time ) :
threshold = timedelta ( minutes = 20 )
delta = abs ( end_time - start_time )
#大于20分钟
if delta > timedelta ( minutes = 20 ) :
time2_adjusted = start_time + threshold
return start_time , time2_adjusted
else :
return start_time , end_time
@classmethod
#处理跨天的场景
def adjust_end_time_if_cross_day ( self , start_time , end_time ) :
if start_time . date ( ) != end_time . date ( ) :
end_time = datetime . combine ( start_time . date ( ) , time ( 23 , 59 , 59 ) )
return start_time , end_time
# if __name__ == '__main__':
# start,end = DBUtils.get_job_period()
# print ( "job:运行参数:{},{}".format(start,end))