hbyd_ueba/utils/db2json.py

# coding=utf-8
"""
 @Author: fu-zhe
 @FileName: db2json.py
 @DateTime: 2024/5/15 10:19
 @Description: 数据库工具查询之后的数据为 list嵌套  需要转换成 前端方便识别的json数据
"""
import json
import traceback
import random,string
import traceback,json
from datetime import datetime,timedelta,time
from dataInterface.functions import CFunction
from dataInterface.db.params import CPgSqlParam
from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron


class DBType(object):
    LIST = 'list'
    DICT = 'dict'

JOB_TABLE_NAME = "ueba_analysis_schema.jobs"

class DBUtils(object):
    @classmethod
    def transition(cls, filed, sql, data_type):
        """
        数据转换 将json格式的data转换成前端方便使用的 json
        :param data_type: 转换数据类型 目前有 list dict两种
        :param filed:  list 格式  数据库字段 例如 select id, name from table; filed= ['id','name']
        :param sql:   sql语句  `select * from table`
        :return: [{filed1: value1, filed2: value2,···}, ····] 或 {filed1: value1, filed2: value2,···}
        """
        data = cls.execute(sql=sql)
        if cls.is_list_of_empty_lists(data):
            return eval(data_type)()
        if data_type == DBType.DICT:
            data = data[0]
            if len(filed) != len(data):
                raise Exception("{}与数据库查询结果长度不一致".format(filed))
            res = {cls.snake2camel(filed[i]): data[i] for i in range(len(filed))}
            logger.info("res = {}".format(res))
            return res
        if data_type == DBType.LIST:
            res = []
            if not data:
                return res
            for item in data:
                if len(item) != len(filed):
                    raise Exception("{}与数据库查询结果长度不一致".format(filed))
                res.append({cls.snake2camel(filed[i]): item[i] for i in range(len(filed))})
            return res

    @classmethod
    def snake2camel(cls, snake_filed):
        """
        蛇形命名转换小驼峰命名
        :param snake_filed: 蛇形命名  例如 user_name
        :return: 驼峰命名  user_name --->  userName
        """
        if not snake_filed or not isinstance(snake_filed, str):
            return snake_filed
        parts = snake_filed.split('_')
        # 转换第一个单词为小写，其余单词首字母大写
        camel = parts[0] + ''.join(word.capitalize() for word in parts[1:])
        return camel

    @classmethod
    def execute(cls, sql):
        """
        执行sql语句
        :param sql: sql语句
        :return:
        """
        try:
            sql_list = CPgSqlParam(sql)
            #logger.info("execute sql:"+sql)
            data = CFunction.execute(sql_list)
            logger.info("execute result : {}".format(data))
            return json.loads(data)
        except Exception as e:
            logger.error("execute sql error sql: \n {}\n tracback: {}\n".format(sql, traceback.format_exc()))
            raise Exception("查询失败")

    @classmethod
    def is_list_of_empty_lists(cls, target_list):
        # 更清晰地检查每个子列表是否为空
        return all(not sublist or len(sublist) == 0 for sublist in target_list)

    @classmethod
    def list_snake2camel(cls, snake_list):
        """
        将列表中 字典的snake命名变成camel命名格式
        :param snake_list: list内部都是蛇形命名的dict `[{'user_name':'', 'user_age': ''}]`
        :return:  `[{'user_name':'', 'user_age': ''}]`  ---->  `[{'userName':'', 'userAge':''}]`
        """
        camel_list = []
        for snake_dict in snake_list:
            camel_list.append({cls.snake2camel(snake): value for snake, value in snake_dict.items()})
        return camel_list
    
    @classmethod
    def write_job_status(self,job_id,status,err,run_count):
        #success
        if status == 2:
            sql = """update {JOB_TABLE_NAME} set status=%s ,complate_time = %s,run_count =%s
                where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)
            CFunction.execute(CPgSqlParam(sql, params=(status,datetime.now(), run_count,job_id)))
        #failed
        if status == 3:
            sql = """update {JOB_TABLE_NAME} set status=%s, err=%s ,run_count = %s
                where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)
            CFunction.execute(CPgSqlParam(sql, params=(status, err, run_count,job_id)))
    
    @classmethod
    def insert_job_record(self,job_id,start_time,end_time,status):
        sql = """insert into {JOB_TABLE_NAME}(job_id,start_time,end_time,status,run_count) values(%s,%s,%s,%s,%s)""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)
        CFunction.execute(CPgSqlParam(sql, params=(job_id,start_time, end_time,status,1)))

    #获取 job的执行时间 开始时间-结束时间   
    @classmethod
    def get_job_period(self):
        sql = """select job_id, to_char(end_time,'YYYY-MM-DD HH24:MI:SS') as end_time,status,run_count,to_char(start_time,'YYYY-MM-DD HH24:MI:SS') as start_time from {JOB_TABLE_NAME} order by end_time desc limit 1""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)
        res = json.loads(CFunction.execute(CPgSqlParam(sql, params=())))
        # print json.dumps(res)
        data = {}
        if res:
            data["job_id"]=res[0][0]
            data["end_time"]=res[0][1]
            data["status"]=res[0][2]
            data["run_count"]=res[0][3]
            data["start_time"]=res[0][4]
        if len(data)==0:
            start_time = datetime.now() - timedelta(minutes=20)
            end_time = datetime.now()- timedelta(minutes=15)
            return start_time,end_time,2,0,""
        if len(data)>0:
            #运行中
            if data["status"] ==1:
                return None,None, 1,data["run_count"],data["job_id"]
            #运行失败 重试不超过3次
            if data["status"] ==3 and data["run_count"]<=3:
                start_time = datetime.strptime(data["start_time"], '%Y-%m-%d %H:%M:%S')
                end_time =  datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')
                return start_time,end_time,3,data["run_count"],data["job_id"]
            
            start_time = datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')
             # 检查时间是否为23:59:59
            if start_time.hour == 23 and start_time.minute == 59 and start_time.second == 59:
                # 是的话，增加一天并设置时间为00:00:00
                start_time = start_time + timedelta(days=1)
                start_time = start_time.replace(hour=0, minute=0, second=0)

            end_time = (datetime.now()- timedelta(minutes=15))
            #判断间隔是否大于20分钟。大于的话再将时间切片
            start_time,end_time=self.reset_start_end_time(start_time, end_time)

        #kafka数据到es会存在5分钟左右的数据延迟，这里设置15分钟
        if end_time > (datetime.now()-timedelta(minutes=15)):
            logger_cron.info("end_time:"+end_time.strftime("%Y-%m-%d %H:%M:%S")+",datetime.now:"+datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
            return None,None,2,999,""
        start_time ,end_time =self.adjust_end_time_if_cross_day(start_time,end_time)
        #需要在入口生成jobid 所以给空
        return start_time,end_time,data["status"],0,""

    @classmethod
    def reset_start_end_time(self,start_time, end_time):
        threshold = timedelta(minutes=20)
        delta = abs(end_time - start_time)
        #大于20分钟
        if delta > timedelta(minutes=20):
            time2_adjusted =  start_time + threshold
            return start_time,time2_adjusted
        else:
            return start_time,end_time

    @classmethod
    #处理跨天的场景
    def adjust_end_time_if_cross_day(self,start_time, end_time):
        if start_time.date() != end_time.date():
            end_time = datetime.combine(start_time.date(), time(23, 59, 59))
        
        return start_time, end_time

# if __name__ == '__main__':
#     start,end = DBUtils.get_job_period()
#     print ( "job：运行参数:{},{}".format(start,end))
'代码提交' 5 months ago			`# coding=utf-8`
			`"""`
			`@Author: fu-zhe`
			`@FileName: db2json.py`
			`@DateTime: 2024/5/15 10:19`
			`@Description: 数据库工具查询之后的数据为 list嵌套需要转换成前端方便识别的json数据`
			`"""`
			`import json`
			`import traceback`
'edit' 4 months ago			`import random,string`
			`import traceback,json`
'代码提交' 4 months ago			`from datetime import datetime,timedelta,time`
'代码提交' 5 months ago			`from dataInterface.functions import CFunction`
			`from dataInterface.db.params import CPgSqlParam`
'代码提交' 4 months ago			`from uebaMetricsAnalysis.utils.ext_logging import logger,logger_cron`
'代码提交' 5 months ago

			`class DBType(object):`
			`LIST = 'list'`
			`DICT = 'dict'`

'代码提交' 4 months ago			`JOB_TABLE_NAME = "ueba_analysis_schema.jobs"`
'代码提交' 5 months ago
			`class DBUtils(object):`
			`@classmethod`
			`def transition(cls, filed, sql, data_type):`
			`"""`
			`数据转换将json格式的data转换成前端方便使用的 json`
			`:param data_type: 转换数据类型目前有 list dict两种`
			`:param filed: list 格式数据库字段例如 select id, name from table; filed= ['id','name']`
			:param sql: sql语句 `select * from table`
			`:return: [{filed1: value1, filed2: value2,···}, ····] 或 {filed1: value1, filed2: value2,···}`
			`"""`
			`data = cls.execute(sql=sql)`
			`if cls.is_list_of_empty_lists(data):`
			`return eval(data_type)()`
			`if data_type == DBType.DICT:`
			`data = data[0]`
			`if len(filed) != len(data):`
			`raise Exception("{}与数据库查询结果长度不一致".format(filed))`
			`res = {cls.snake2camel(filed[i]): data[i] for i in range(len(filed))}`
			`logger.info("res = {}".format(res))`
			`return res`
			`if data_type == DBType.LIST:`
			`res = []`
			`if not data:`
			`return res`
			`for item in data:`
			`if len(item) != len(filed):`
			`raise Exception("{}与数据库查询结果长度不一致".format(filed))`
			`res.append({cls.snake2camel(filed[i]): item[i] for i in range(len(filed))})`
			`return res`

			`@classmethod`
			`def snake2camel(cls, snake_filed):`
			`"""`
			`蛇形命名转换小驼峰命名`
			`:param snake_filed: 蛇形命名例如 user_name`
			`:return: 驼峰命名 user_name ---> userName`
			`"""`
			`if not snake_filed or not isinstance(snake_filed, str):`
			`return snake_filed`
			`parts = snake_filed.split('_')`
			`# 转换第一个单词为小写，其余单词首字母大写`
			`camel = parts[0] + ''.join(word.capitalize() for word in parts[1:])`
			`return camel`

			`@classmethod`
			`def execute(cls, sql):`
			`"""`
			`执行sql语句`
			`:param sql: sql语句`
			`:return:`
			`"""`
			`try:`
			`sql_list = CPgSqlParam(sql)`
'代码提交' 4 months ago			`#logger.info("execute sql:"+sql)`
'代码提交' 5 months ago			`data = CFunction.execute(sql_list)`
			`logger.info("execute result : {}".format(data))`
			`return json.loads(data)`
			`except Exception as e:`
			`logger.error("execute sql error sql: \n {}\n tracback: {}\n".format(sql, traceback.format_exc()))`
			`raise Exception("查询失败")`

			`@classmethod`
			`def is_list_of_empty_lists(cls, target_list):`
			`# 更清晰地检查每个子列表是否为空`
			`return all(not sublist or len(sublist) == 0 for sublist in target_list)`

			`@classmethod`
			`def list_snake2camel(cls, snake_list):`
			`"""`
			`将列表中字典的snake命名变成camel命名格式`
			:param snake_list: list内部都是蛇形命名的dict `[{'user_name':'', 'user_age': ''}]`
			:return: `[{'user_name':'', 'user_age': ''}]` ----> `[{'userName':'', 'userAge':''}]`
			`"""`
			`camel_list = []`
			`for snake_dict in snake_list:`
			`camel_list.append({cls.snake2camel(snake): value for snake, value in snake_dict.items()})`
			`return camel_list`

			`@classmethod`
'提交' 4 months ago			`def write_job_status(self,job_id,status,err,run_count):`
'edit' 4 months ago			`#success`
			`if status == 2:`
'代码提交' 4 months ago			`sql = """update {JOB_TABLE_NAME} set status=%s ,complate_time = %s,run_count =%s`
'代码提交' 5 months ago			`where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)`
'代码提交' 4 months ago			`CFunction.execute(CPgSqlParam(sql, params=(status,datetime.now(), run_count,job_id)))`
'edit' 4 months ago			`#failed`
			`if status == 3:`
'提交' 4 months ago			`sql = """update {JOB_TABLE_NAME} set status=%s, err=%s ,run_count = %s`
'edit' 4 months ago			`where job_id=%s """.format(JOB_TABLE_NAME=JOB_TABLE_NAME)`
'代码提交' 4 months ago			`CFunction.execute(CPgSqlParam(sql, params=(status, err, run_count,job_id)))`
'edit' 4 months ago
'代码提交' 5 months ago			`@classmethod`
			`def insert_job_record(self,job_id,start_time,end_time,status):`
'提交' 4 months ago			`sql = """insert into {JOB_TABLE_NAME}(job_id,start_time,end_time,status,run_count) values(%s,%s,%s,%s,%s)""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)`
			`CFunction.execute(CPgSqlParam(sql, params=(job_id,start_time, end_time,status,1)))`
'edit' 4 months ago
			`#获取 job的执行时间开始时间-结束时间`
			`@classmethod`
			`def get_job_period(self):`
'提交' 4 months ago			`sql = """select job_id, to_char(end_time,'YYYY-MM-DD HH24:MI:SS') as end_time,status,run_count,to_char(start_time,'YYYY-MM-DD HH24:MI:SS') as start_time from {JOB_TABLE_NAME} order by end_time desc limit 1""".format(JOB_TABLE_NAME=JOB_TABLE_NAME)`
'edit' 4 months ago			`res = json.loads(CFunction.execute(CPgSqlParam(sql, params=())))`
'代码提交' 4 months ago			`# print json.dumps(res)`
'edit' 4 months ago			`data = {}`
			`if res:`
			`data["job_id"]=res[0][0]`
			`data["end_time"]=res[0][1]`
'提交' 4 months ago			`data["status"]=res[0][2]`
			`data["run_count"]=res[0][3]`
			`data["start_time"]=res[0][4]`
'edit' 4 months ago			`if len(data)==0:`
'提交' 4 months ago			`start_time = datetime.now() - timedelta(minutes=20)`
			`end_time = datetime.now()- timedelta(minutes=15)`
			`return start_time,end_time,2,0,""`
'edit' 4 months ago			`if len(data)>0:`
'提交' 4 months ago			`#运行中`
			`if data["status"] ==1:`
			`return None,None, 1,data["run_count"],data["job_id"]`
			`#运行失败重试不超过3次`
			`if data["status"] ==3 and data["run_count"]<=3:`
'代码提交' 4 months ago			`start_time = datetime.strptime(data["start_time"], '%Y-%m-%d %H:%M:%S')`
			`end_time = datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')`
'提交' 4 months ago			`return start_time,end_time,3,data["run_count"],data["job_id"]`

'代码提交' 4 months ago			`start_time = datetime.strptime(data["end_time"], '%Y-%m-%d %H:%M:%S')`
'代码提交' 4 months ago			`# 检查时间是否为23:59:59`
			`if start_time.hour == 23 and start_time.minute == 59 and start_time.second == 59:`
			`# 是的话，增加一天并设置时间为00:00:00`
			`start_time = start_time + timedelta(days=1)`
			`start_time = start_time.replace(hour=0, minute=0, second=0)`
'edit' 4 months ago
'代码提交' 4 months ago			`end_time = (datetime.now()- timedelta(minutes=15))`
			`#判断间隔是否大于20分钟。大于的话再将时间切片`
			`start_time,end_time=self.reset_start_end_time(start_time, end_time)`

			`#kafka数据到es会存在5分钟左右的数据延迟，这里设置15分钟`
'提交' 4 months ago			`if end_time > (datetime.now()-timedelta(minutes=15)):`
'代码提交' 4 months ago			`logger_cron.info("end_time:"+end_time.strftime("%Y-%m-%d %H:%M:%S")+",datetime.now:"+datetime.now().strftime("%Y-%m-%d %H:%M:%S"))`
'提交' 4 months ago			`return None,None,2,999,""`
'edit' 4 months ago			`start_time ,end_time =self.adjust_end_time_if_cross_day(start_time,end_time)`
'提交' 4 months ago			`#需要在入口生成jobid 所以给空`
'代码提交' 4 months ago			`return start_time,end_time,data["status"],0,""`

			`@classmethod`
			`def reset_start_end_time(self,start_time, end_time):`
			`threshold = timedelta(minutes=20)`
			`delta = abs(end_time - start_time)`
			`#大于20分钟`
			`if delta > timedelta(minutes=20):`
			`time2_adjusted = start_time + threshold`
			`return start_time,time2_adjusted`
			`else:`
			`return start_time,end_time`
'edit' 4 months ago
			`@classmethod`
			`#处理跨天的场景`
			`def adjust_end_time_if_cross_day(self,start_time, end_time):`
			`if start_time.date() != end_time.date():`
'代码提交' 4 months ago			`end_time = datetime.combine(start_time.date(), time(23, 59, 59))`
'edit' 4 months ago
			`return start_time, end_time`
'提交' 4 months ago
'代码提交' 4 months ago			`# if __name__ == '__main__':`
			`# start,end = DBUtils.get_job_period()`
			`# print ( "job：运行参数:{},{}".format(start,end))`