You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hbyd_ueba/utils/db2json.py

187 lines
8.0 KiB

# coding=utf-8
"""
@Author: fu-zhe
@FileName: db2json.py
@DateTime: 2024/5/15 10:19
@Description: 数据库工具查询之后的数据为 list嵌套 需要转换成 前端方便识别的json数据
"""
import json
import traceback
import random,string
import traceback,json
from datetime import datetime,timedelta,time
from dataInterface.functions import CFunction
from dataInterface.db.params import CPgSqlParam
from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron
class DBType(object):
LIST = 'list'
DICT = 'dict'
JOB_TABLE_NAME = "ueba_analysis_schema.jobs"
class DBUtils(object):
@classmethod
def transition(cls, filed, sql, data_type):
"""
数据转换 将json格式的data转换成前端方便使用的 json
:param data_type: 转换数据类型 目前有 list dict两种
:param filed: list 格式 数据库字段 例如 select id, name from table; filed= ['id','name']
:param sql: sql语句 `select * from table`
:return: [{filed1: value1, filed2: value2,···}, ····] 或 {filed1: value1, filed2: value2,···}
"""
data = cls.execute(sql=sql)
if cls.is_list_of_empty_lists(data):
return eval(data_type)()
if data_type == DBType.DICT:
data = data[0]
if len(filed) != len(data):
raise Exception("{}与数据库查询结果长度不一致".format(filed))
res = {cls.snake2camel(filed[i]): data[i] for i in range(len(filed))}
logger.info("res = {}".format(res))
return res
if data_type == DBType.LIST:
res = []
if not data:
return res
for item in data:
if len(item) != len(filed):
raise Exception("{}与数据库查询结果长度不一致".format(filed))
res.append({cls.snake2camel(filed[i]): item[i] for i in range(len(filed))})
return res
@classmethod
def snake2camel(cls, snake_filed):
"""
蛇形命名转换小驼峰命名
:param snake_filed: 蛇形命名 例如 user_name
:return: 驼峰命名 user_name ---> userName
"""
if not snake_filed or not isinstance(snake_filed, str):
return snake_filed
parts = snake_filed.split('_')
# 转换第一个单词为小写,其余单词首字母大写
camel = parts[0] + ''.join(word.capitalize() for word in parts[1:])
return camel
@classmethod
def execute(cls, sql):
"""
执行sql语句
:param sql: sql语句
:return:
"""
try:
sql_list = CPgSqlParam(sql)
#logger.info("execute sql:"+sql)
data = CFunction.execute(sql_list)
logger.info("execute result : {}".format(data))
return json.loads(data)
except Exception as e:
logger.error("execute sql error sql: \n {}\n tracback: {}\n".format(sql, traceback.format_exc()))
raise Exception("查询失败")
@classmethod
def is_list_of_empty_lists(cls, target_list):
# 更清晰地检查每个子列表是否为空
return all(not sublist or len(sublist) == 0 for sublist in target_list)
@classmethod
def list_snake2camel(cls, snake_list):
"""
将列表中 字典的snake命名变成camel命名格式
:param snake_list: list内部都是蛇形命名的dict `[{'user_name':'', 'user_age': ''}]`
:return: `[{'user_name':'', 'user_age': ''}]` ----> `[{'userName':'', 'userAge':''}]`
"""
camel_list = []
for snake_dict in snake_list:
camel_list.append({cls.snake2camel(snake): value for snake, value in snake_dict.items()})
return camel_list
@classmethod
def write_job_status(self,job_id,status,err,run_count):
#success
if status == 2:
sql = """update {JOB_TABLE_NAME} set status=%s ,complate_time = %s,run_count =%s
where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)
CFunction.execute(CPgSqlParam(sql, params=(status,datetime.now(), run_count,job_id)))
#failed
if status == 3:
sql = """update {JOB_TABLE_NAME} set status=%s, err=%s ,run_count = %s
where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)
CFunction.execute(CPgSqlParam(sql, params=(status, err, run_count,job_id)))
@classmethod
def insert_job_record(self,job_id,start_time,end_time,status):
sql = """insert into {JOB_TABLE_NAME}(job_id,start_time,end_time,status,run_count) values(%s,%s,%s,%s,%s)""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)
CFunction.execute(CPgSqlParam(sql, params=(job_id,start_time, end_time,status,1)))
#获取 job的执行时间 开始时间-结束时间
@classmethod
def get_job_period(self):
sql = """select job_id, to_char(end_time,'YYYY-MM-DD HH24:MI:SS') as end_time,status,run_count,to_char(start_time,'YYYY-MM-DD HH24:MI:SS') as start_time from {JOB_TABLE_NAME} order by end_time desc limit 1""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)
res = json.loads(CFunction.execute(CPgSqlParam(sql, params=())))
# print json.dumps(res)
data = {}
if res:
data["job_id"]=res[0][0]
data["end_time"]=res[0][1]
data["status"]=res[0][2]
data["run_count"]=res[0][3]
data["start_time"]=res[0][4]
if len(data)==0:
start_time = datetime.now() - timedelta(minutes=20)
end_time = datetime.now()- timedelta(minutes=15)
return start_time,end_time,2,0,""
if len(data)>0:
#运行中
if data["status"] ==1:
return None,None, 1,data["run_count"],data["job_id"]
#运行失败 重试不超过3次
if data["status"] ==3 and data["run_count"]<=3:
start_time = datetime.strptime(data["start_time"], '%Y-%m-%d %H:%M:%S')
end_time = datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')
return start_time,end_time,3,data["run_count"],data["job_id"]
start_time = datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')
# 检查时间是否为23:59:59
if start_time.hour == 23 and start_time.minute == 59 and start_time.second == 59:
# 是的话,增加一天并设置时间为00:00:00
start_time = start_time + timedelta(days=1)
start_time = start_time.replace(hour=0, minute=0, second=0)
end_time = (datetime.now()- timedelta(minutes=15))
#判断间隔是否大于20分钟。大于的话再将时间切片
start_time,end_time=self.reset_start_end_time(start_time, end_time)
#kafka数据到es会存在5分钟左右的数据延迟,这里设置15分钟
if end_time > (datetime.now()-timedelta(minutes=15)):
logger_cron.info("end_time:"+end_time.strftime("%Y-%m-%d %H:%M:%S")+",datetime.now:"+datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
return None,None,2,999,""
start_time ,end_time =self.adjust_end_time_if_cross_day(start_time,end_time)
#需要在入口生成jobid 所以给空
return start_time,end_time,data["status"],0,""
@classmethod
def reset_start_end_time(self,start_time, end_time):
threshold = timedelta(minutes=20)
delta = abs(end_time - start_time)
#大于20分钟
if delta > timedelta(minutes=20):
time2_adjusted = start_time + threshold
return start_time,time2_adjusted
else:
return start_time,end_time
@classmethod
#处理跨天的场景
def adjust_end_time_if_cross_day(self,start_time, end_time):
if start_time.date() != end_time.date():
end_time = datetime.combine(start_time.date(), time(23, 59, 59))
return start_time, end_time
# if __name__ == '__main__':
# start,end = DBUtils.get_job_period()
# print ( "job:运行参数:{},{}".format(start,end))