You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hbyd_ueba/utils/esUtil.py

415 lines
14 KiB

# -*- coding: utf-8 -*-
# @Time : 2019/6/6 9:48
# @Author : panda
import datetime
import json
import time
import traceback
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from appsUtils.confutil import ConfUtil
conf_util = ConfUtil()
es_dict = conf_util.getElasticsearchConf()
es_host_list = list()
for ip in es_dict.get('ip').split(','):
es_host_list.append({'host': ip, 'port': int(es_dict.get('webport'))})
# es_host_list = [{'host': '10.67.1.180', 'port': 9200}]
# 默认type
TYPE = '_doc'
# 默认分片数量
NUMBER_OF_SHARDS = 5
# 默认副本数量
NUMBER_OF_REPLICAS = 1
def to_dict(data):
"""
将sql 获取的list结果集转化为[{xx=xxx,x=xxx},{}]
:param data: sql查询的数据
:return:
"""
res = list()
columns = data.get('columns')
for row in data.get('rows'):
tmp = dict()
for i in range(0, len(row)):
tmp[columns[i]['name']] = row[i]
res.append(tmp)
return res
class EsUtil(object):
es_client = None
def __init__(self):
if not EsUtil.es_client:
EsUtil.es_client = Elasticsearch(es_host_list,
timeout=60,
max_retries=10,
retry_on_timeout=True)
self.es = EsUtil.es_client
def get_client(self):
"""
提供原生的es_Client
:return:
"""
return self.es
def is_index_exist(self, index_name):
return self.es.indices.exists(index=index_name)
def get_available_index(self, start_time=None, end_time=None, prefix=None, suffix=None):
results = list()
index_ = "*"
start_date = None
end_date = None
if prefix:
index_ = prefix + index_
if suffix:
index_ = index_ + suffix
res = self.es.cat.indices(index=index_, format="json")
if start_time:
start_date = datetime.datetime.fromtimestamp(start_time / 1000).strftime("%Y%m%d")
if end_time:
end_date = datetime.datetime.fromtimestamp(end_time / 1000).strftime("%Y%m%d")
for ind in res:
indices = ind.get('index', '').split('-')
if start_date and len(indices) > 1:
if indices[-2] < start_date:
continue
if end_date and len(indices) > 1:
if indices[-2] > end_date:
continue
results.append(ind)
return results
def get_available_index_name(self, start_time=None, end_time=None, prefix=None, suffix=None):
results = list()
indices = self.get_available_index(start_time, end_time, prefix, suffix)
if not indices:
return results
for index_ in indices:
results.append(index_.get("index"))
return results
def search_by_sql(self, sql):
"""
sql查询
注:keyword类型的字段才能进行分组聚合查询
:param sql:
:return:
"""
return to_dict(self.es.xpack.sql.query(body={'query': sql}))
def create_index(self, index_name, field_type_dict, number_of_shards, number_of_replicas):
"""
简单的创建索引,暂时支持传入简单的键值对
:param index_name 索引名称
:param field_type_dict 字段名称,类型字典
:param number_of_shards 分片数量
:param number_of_replicas 副本数量
:return: 创建成功
"""
if self.is_index_exist(index_name):
raise Exception('index [%s] is exist' % index_name)
body = dict()
settings = {
'number_of_shards': number_of_shards,
'number_of_replicas': number_of_replicas
}
mappings = dict()
index_type = dict()
properties = dict()
# print field_type_dict
for (key, value) in field_type_dict.items():
properties[key] = {'type': value}
index_type['properties'] = properties
mappings[TYPE] = index_type
body['settings'] = settings
body['mappings'] = mappings
# print json.dumps(body)
response = self.es.indices.create(index=index_name, body=body)
return response['acknowledged'] and response['shards_acknowledged']
def create_index_by_mapping_alias(self, index_name, mappings, alias_name):
"""
加入别名和动态mapping
:param index_name:
:param mappings:
:param alias_name:
:return:
"""
if self.is_index_exist(index_name):
raise Exception('index [%s] is exist' % index_name)
# es.indices.create(index=index_name)
# es.indices.put_mapping(index=index_name, body=mapping)
# es.indices.put_alias(index=index_name,name=alias_name)
# 使用一个请求创建
request_body = dict()
request_body['settings'] = {
'number_of_replicas': NUMBER_OF_REPLICAS,
'number_of_shards': NUMBER_OF_SHARDS
}
if isinstance(mappings, dict):
request_body['mappings'] = mappings
else:
request_body['mappings'] = json.loads(mappings)
request_body[index_name] = {
'aliases': {
alias_name: {}
}
}
response = self.es.indices.create(index=index_name, body=request_body)
return response['acknowledged'] and response['shards_acknowledged']
def create_index_simple(self, index_name, field_type_dict):
"""
默认五个分片一个副本,或者从配置文件中读取,暂定
:param index_name:
:param field_type_dict:
:return:
"""
return self.create_index(index_name, field_type_dict, NUMBER_OF_SHARDS, NUMBER_OF_REPLICAS)
def create_index_by_body(self, index_name, request_body):
"""
自己传入body进行索引的创建
:param index_name:
:param request_body: 用户传入mapping,setting设置{‘mappings’:{'properties‘:{}},'settings':{}}
:return: 索引是否创建成功
"""
if self.is_index_exist(index_name):
raise Exception('index [%s] is exist' % index_name)
response = self.es.indices.create(index=index_name, body=request_body)
return response['acknowledged'] and response['shards_acknowledged']
def search(self, index_name, request_body, request_params=dict()):
"""
查询接口(原生)
:param request_params:
:param index_name:
:param request_body:
:return:
"""
return self.es.search(index=index_name, body=request_body, params=request_params, request_timeout=60)
def search_by_uri(self, index_name, uri_params):
"""
通过uri的方式进行查询
demo: test/_search?q=Panda&df=name&from=10&size=10&sort=age:desc&sort=id:desc
:param index_name:索引名称,可以为空,会在所有的索引中查询
:param uri_params: dict类型,类似于
{
'q': 'Alice',
'df': "name",
'from': 3,
'size': 10,
'sort': [
'age:desc', 'name:desc'
]
},详细信息请查询uri语法
:return:
"""
return self.es.search(index=index_name, params=uri_params)
def scroll_search(self, index_name, scroll, request_body, request_params=dict()):
"""
通过快照进行分页查询,并返回第一个快照查询的结果和快照的id,用于继续查询
注:此查询只能不停的向后查询,不能返回上一页
:param request_params:
:param index_name 索引名称
:param scroll 快照保留的时间
:param request_body 查询的请求参数
:return: response为查询的数据,scroll_msg返回,并用于获取下一次的快照信息,scroll_size可用于跳出循环后记录开始from
"""
response = self.es.search(index=index_name, scroll=scroll, body=request_body, params=request_params, request_timeout=60)
scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll}
return scroll_msg, response
def scroll_next(self, scroll_msg, request_params=dict()):
"""
传入scroll_search返回的第一个参数,用于获取下一次的快照
:param request_params:
:param scroll_msg:
:return:
"""
response = self.es.scroll(body=scroll_msg, params=request_params)
scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll_msg.get('scroll')}
return scroll_msg, response
def delete_index(self, index_name):
"""
删除
:param index_name:
:return:
"""
return self.es.indices.delete(index=index_name)['acknowledged']
def delete_index_by_alias(self, alias_name):
"""
通过别名删除索引和别名,对别名有特殊需求,索引名称为 别名+标志
此方法有风险,可能会删除其他人创建的alias_name*索引
谨慎使用
:return:
"""
index_name = '%s*' % alias_name
try:
if self.es.indices.exists_alias(name=alias_name, index=index_name):
self.es.indices.delete_alias(name=alias_name, index=index_name)
if self.es.indices.exists(index=index_name):
self.es.indices.delete(index=index_name)
except:
traceback.format_exc()
return not (self.es.indices.exists_alias(name=alias_name, index=index_name) and self.es.indices.exists(
index=index_name))
def index(self, index_name, request_body):
"""
单条doc插入
:param index_name 索引名称
:param request_body 请求数据dict
{
"name": "Alice",
"address": "武汉",
"age": 1,
"birthday": "2019-06-03T18:47:45.999"
}
:return:
"""
return self.es.index(index=index_name, doc_type=TYPE, body=request_body).get('result')
def bulk_insert(self, index_name, data_list):
"""
批量插入
:return:
"""
actions = list()
for data in data_list:
action = {
"_index": index_name,
"_type": TYPE,
'_source': data
}
actions.append(action)
return helpers.bulk(self.es, actions)
def search_after_start(self, index_name, request_body):
"""
通过elasticsearch search after 避免深度分页的问题
:return:
"""
if request_body.get('size') is None and request_body.get('sort') is None:
raise Exception('request body is not validate')
response = self.es.search(index=index_name, body=request_body)
search_after_body = {
'size': request_body.get('size'),
'sort': request_body.get('sort'),
'search_after': request_body.get('hits', {}).get('hits', {}).get('sort')
}
return search_after_body, response
def search_after(self, index_name, search_after_body):
"""
search_after
:param index_name:
:param search_after_body
:return:
"""
response = self.es.search(index=index_name, body=search_after_body)
search_after_body = {
'size': search_after_body.get('size'),
'sort': search_after_body.get('sort'),
'search_after': response.get('hits', {}).get('hits', {}).get('sort')
}
return search_after_body, response
def add_field(self, index, mapping):
"""
新增索引字段
@param index: 索引名称
@param mapping: 参数样例 mapping = {
"properties": {
field_name: {
"type": field_type
}
}
}
@return:
"""
self.es.indices.put_mapping(index=index, doc_type="_doc",body=mapping,include_type_name=True)
def bulk_update(self, index, query_dsl):
"""
批量更新
@param index: 索引名称
@param query_dsl: 满足更新条件的查询语句 {
"query":{
"bool":{"must":[
{
"terms":{
"log_id":["c6c8eaca-d891-4f0e-b15b-b02f02dbe4df","92f40a7c-e3f1-412d-9a00-72f22b7ebc9b","4257dbe6-369a-42f5-9f14-4406a3eb5c7a"]
}
}
]}
},
"script":{
"inline":"ctx._source.dport = params.dport",
"params":{
"dport":50801
},
"lang":"painless"
}
}
@return:
"""
self.es.update_by_query(index=index,body=query_dsl)
if __name__ == '__main__':
es = EsUtil()
# print es.is_index_exist('test')
#es.search('', {})
index_name = "internal_isop_log-*"
# mapping = {
# "properties": {
# "is_report": {
# "type": "boolean"
# }
# }
# }
#
# es.add_field(index_name,mapping)
index = 'internal_isop_incident-*'
query_dsl ={
"query":{
"bool":{"must":[
{
"terms":{
"id":[ ["9f00c0be-ba38-4edc-9f39-889a57ef89c4cq", "29a9c4dc-e7d4-432b-aef8-d216401cb9e5cq", "8494a6be-f80e-4983-adee-92cbf7ef5c31cq"]]
}
}
]}
},
"script":{
"inline":"ctx._source.is_report = params.is_report",
"params":{
"is_report":True
},
"lang":"painless"
}
}
es.bulk_update(index,query_dsl)