You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hbyd_ueba/utils/dashboard_data_conversion.py

293 lines
11 KiB

# coding=utf-8
from __future__ import division
import json
from datetime import timedelta,datetime
from collections import defaultdict
jobnum_region_dict = {
"10": "省公司",
"110": "武汉分公司",
"170": "襄阳分公司",
"130": "鄂州分公司",
"260": "孝感分公司",
"250": "黄冈分公司",
"120": "黄石分公司",
"190": "咸宁分公司",
"200": "荆州分公司",
"140": "宜昌分公司",
"150": "恩施分公司",
"160": "十堰分公司",
"240": "随州分公司",
"230": "荆门分公司",
"1801": "江汉分公司",
"1802": "潜江分公司",
"1803": "天门分公司"
}
def keep_digits_filter(code):
"""
工号可能有字面,去掉字面,以数字起算
"""
return ''.join(filter(str.isdigit, str(code)))
#安全除
def safe_divide(numerator, denominator):
if denominator == 0:
return
else:
return numerator / denominator
def find_region_by_code(code, region_dict):
"""
查询工号对应公司
未查询到 返回错误工号
"""
code_str = keep_digits_filter(code)
#只有7位或8位的工号才是合法
if len(code_str)!=7 and len(code_str) != 8:
return u"其他"
# 如果code以"18"开头,检查前4位
if code_str.startswith("18"):
return region_dict.get(code_str[:4], u"其他")
# 否则,检查前2位
else:
return region_dict.get(code_str[:2], u"其他")
def ip_summary_data_format(ip_summary_data):
"""
ip维度数据转换方法
"""
result = {"summary": {"ip": []}, "detail": {"ip": {}}}
grouped_data = defaultdict(lambda: {"reqs": 0, "ips": set()})
ip_detail_dict = defaultdict(lambda: defaultdict(lambda: {"req_frequency": 0}))
# 全部账号元组
ips_total = set()
for ip_data in ip_summary_data:
company = find_region_by_code(ip_data["jobnum"], jobnum_region_dict)
count = ip_data["count"]
ip = ip_data["ip"]
jobnum = ip_data["jobnum"]
ip_detail_dict_key = "{}{}".format(ip, jobnum)
# 更新统计数据
grouped_data[company]["reqs"] += count
grouped_data[company]["ips"].add(ip)
ips_total.add(ip)
# 构建下钻详情
ip_detail_dict[company][ip_detail_dict_key]["req_ip"] = ip
ip_detail_dict[company][ip_detail_dict_key]["req_jobnum"] = jobnum
ip_detail_dict[company][ip_detail_dict_key]["req_frequency"] += count
# 统计总请求次数和独立IP数
reqs_total = sum(data["reqs"] for data in grouped_data.values())
# 请求为0抛出
if reqs_total == 0:
return result
# 构建summary部分
ip_data_list = [
{
"company": company,
"req_frequency": data["reqs"],
# 本公司的 请求次数/所有公司 请求次数的合计
"frequency_rate": round(data["reqs"] / reqs_total, 4),
"ip_count": len(data["ips"]),
# 本公司的 ip个数/所有公司 ip个数的合计
"ip_rate": round(len(data["ips"]) / len(ips_total), 4),
# 本公司的 请求次数/本公司 ip个数的合计
"ip_avg": safe_divide(data["reqs"],len(data["ips"])),
}
for company, data in grouped_data.items()
]
result["summary"]["ip"] = sorted(ip_data_list, key=lambda x: x["req_frequency"], reverse=True)
# 构建detail部分
result["detail"]["ip"] = {
company: sorted(data.values(), key=lambda x: x['req_frequency'], reverse=True)[:500]
for company, data in ip_detail_dict.items()
}
return result
def account_summary_data_format(account_summary_data):
"""
账号维度数据转换方法
"""
result = {"summary": {"account": []}, "detail": {"account": {}}}
grouped_data = defaultdict(lambda: {"reqs": 0, "accounts": set()})
account_detail_dict = defaultdict(lambda: defaultdict(lambda: {"req_frequency": 0}))
accounts_total = set()
for account_data in account_summary_data:
company = find_region_by_code(account_data["jobnum"], jobnum_region_dict)
count = account_data["count"]
account = account_data["account"]
jobnum = account_data["jobnum"]
account_detail_dict_key = "{}{}".format(account, jobnum)
# 更新统计数据
grouped_data[company]["reqs"] += count
grouped_data[company]["accounts"].add(account)
accounts_total.add(account)
# 更新下钻详情
account_detail_dict[company][account_detail_dict_key]["req_account"] = account
account_detail_dict[company][account_detail_dict_key]["req_jobnum"] = jobnum
account_detail_dict[company][account_detail_dict_key]["req_frequency"] += count
# 统计总请求次数和独立账号数
reqs_total = sum(data["reqs"] for data in grouped_data.values())
# 请求为0抛出
if reqs_total == 0:
return result
# 构建summary部分
account_data_list = [
{
"company": company,
"req_frequency": data["reqs"],
# 本公司的 请求次数/所有公司 请求次数的合计
"frequency_rate": round(data["reqs"] / reqs_total, 4),
"account_count": len(data["accounts"]),
# 本公司的 账号次数/所有公司 账号次数的合计
"account_rate": round(len(data["accounts"]) / len(accounts_total), 4),
# 本公司的 请求次数/本公司 账号次数的合计
"account_avg": safe_divide(data["reqs"],len(data["accounts"])),
}
for company, data in grouped_data.items()
]
result["summary"]["account"] = sorted(account_data_list, key=lambda x: x["req_frequency"], reverse=True)
# 构建detail部分
result["detail"]["account"] = {company: sorted(data.values(), key=lambda x: x['req_frequency'], reverse=True)[:500]
for company, data in account_detail_dict.items()}
return result
def interface_summary_data_format(interface_summary_data):
"""
接口维度数据转换方法
"""
result = {"summary": {"interface": []}, "detail": {"interface": {}}}
grouped_data = defaultdict(lambda: {"reqs": 0})
interface_detail_dict = defaultdict(lambda: defaultdict(lambda: {"req_frequency": 0}))
for interface_data in interface_summary_data:
count = interface_data["count"]
interface = interface_data["interface"]
jobnum = interface_data["jobnum"]
account = interface_data["account"]
ip = interface_data["ip"]
interface_detail_dict_key = "{}{}{}".format(ip, account, jobnum)
# 更新统计数据
grouped_data[interface]["reqs"] += count
# 构建下钻详情
interface_detail_dict[interface][interface_detail_dict_key]["interface_addr"] = interface
interface_detail_dict[interface][interface_detail_dict_key]["req_ip"] = ip
interface_detail_dict[interface][interface_detail_dict_key]["req_account"] = account
interface_detail_dict[interface][interface_detail_dict_key]["req_jobnum"] = jobnum
interface_detail_dict[interface][interface_detail_dict_key]["req_frequency"] += count
# 统计总请求次数
reqs_total = sum(data["reqs"] for data in grouped_data.values())
# 请求为0抛出
if reqs_total == 0:
return result
# 构建summary部分
interface_data_list = [
{
"interface_addr": interface,
"req_frequency": data["reqs"],
# 本接口的 请求次数/所有接口 请求次数的合计
"frequency_rate": round(data["reqs"] / reqs_total, 4),
# 本接口的 请求次数/ 20 查询top20接口
"frequency_avg": safe_divide(data["reqs"],20),
}
for interface, data in grouped_data.items()
]
result["summary"]["interface"] = sorted(interface_data_list, key=lambda x: x["req_frequency"], reverse=True)[:20]
# 构建detail部分
result["detail"]["interface"] = {
company: sorted(data.values(), key=lambda x: x["req_frequency"], reverse=True)
for company, data in interface_detail_dict.items()[:500]
}
return result
def menu_summary_data_format(menu_summary_data):
"""
菜单维度数据转换方法
"""
result = {"summary": {"menu": []}, "detail": {"menu": {}}}
grouped_data = defaultdict(lambda: {"reqs": 0, "menu": set()})
menu_detail_dict = defaultdict(lambda: defaultdict(lambda: {"req_frequency": 0}))
menu_total = set()
for menu_data in menu_summary_data:
count = menu_data["count"]
menu = menu_data["menu"]
jobnum = menu_data["jobnum"]
account = menu_data["account"]
ip = menu_data["ip"]
menu_detail_dict_key = "{}{}{}".format(ip, account, jobnum)
# 更新统计数据和独立菜单数
grouped_data[menu]["reqs"] += count
grouped_data[menu]["menu"].add(menu)
menu_total.add(menu)
# 构建下钻详情
menu_detail_dict[menu][menu_detail_dict_key]["menu_name"] = menu
menu_detail_dict[menu][menu_detail_dict_key]["req_ip"] = ip
menu_detail_dict[menu][menu_detail_dict_key]["req_account"] = account
menu_detail_dict[menu][menu_detail_dict_key]["req_jobnum"] = jobnum
menu_detail_dict[menu][menu_detail_dict_key]["req_frequency"] += count
# 统计总请求次数
reqs_total = sum(data["reqs"] for data in grouped_data.values())
# 请求为0抛出
if reqs_total == 0 or menu_total == 0:
return result
# 构建summary部分
menu_data_list = [
{
"menu_name": menu,
"req_frequency": data["reqs"],
# 本菜单的 请求次数 /所有菜单 请求次数的合计
"frequency_rate": round(data["reqs"] / reqs_total, 4),
# 本菜单的 请求次数 /所有菜单 个数的合计
"frequency_avg": safe_divide(data["reqs"],len(menu_total)),
}
for menu, data in grouped_data.items()
]
result["summary"]["menu"] = sorted(menu_data_list, key=lambda x: x["req_frequency"], reverse=True)
# 构建detail部分
result["detail"]["menu"] = {company: sorted(data.values(), key=lambda x: x["req_frequency"], reverse=True)[:500]
for company, data in menu_detail_dict.items()}
return result
def adjust_times(start_time, end_time):
start_time = datetime.strptime(start_time, "%Y-%m-%d")
end_time = datetime.strptime(end_time, "%Y-%m-%d")
delta_days = (end_time - start_time).days
if delta_days == 0:
pre_date = start_time-timedelta(1)
pre_date = start_time-timedelta(1)
return pre_date.strftime("%Y-%m-%d"),pre_date.strftime("%Y-%m-%d")
if delta_days > 0:
pre_start_date = start_time-timedelta(delta_days+1)
pre_end_date = end_time-timedelta(delta_days+1)
return pre_start_date.strftime("%Y-%m-%d"),pre_end_date.strftime("%Y-%m-%d")
return start_time, end_time