You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
415 lines
14 KiB
415 lines
14 KiB
# -*- coding: utf-8 -*-
|
|
# @Time : 2019/6/6 9:48
|
|
# @Author : panda
|
|
|
|
import datetime
|
|
import json
|
|
import time
|
|
import traceback
|
|
from elasticsearch import Elasticsearch
|
|
from elasticsearch import helpers
|
|
|
|
from appsUtils.confutil import ConfUtil
|
|
|
|
conf_util = ConfUtil()
|
|
es_dict = conf_util.getElasticsearchConf()
|
|
es_host_list = list()
|
|
for ip in es_dict.get('ip').split(','):
|
|
es_host_list.append({'host': ip, 'port': int(es_dict.get('webport'))})
|
|
# es_host_list = [{'host': '10.67.1.180', 'port': 9200}]
|
|
# 默认type
|
|
TYPE = '_doc'
|
|
# 默认分片数量
|
|
NUMBER_OF_SHARDS = 5
|
|
# 默认副本数量
|
|
NUMBER_OF_REPLICAS = 1
|
|
|
|
|
|
def to_dict(data):
|
|
"""
|
|
将sql 获取的list结果集转化为[{xx=xxx,x=xxx},{}]
|
|
:param data: sql查询的数据
|
|
:return:
|
|
"""
|
|
res = list()
|
|
columns = data.get('columns')
|
|
for row in data.get('rows'):
|
|
tmp = dict()
|
|
for i in range(0, len(row)):
|
|
tmp[columns[i]['name']] = row[i]
|
|
res.append(tmp)
|
|
return res
|
|
|
|
|
|
class EsUtil(object):
|
|
es_client = None
|
|
|
|
def __init__(self):
|
|
if not EsUtil.es_client:
|
|
EsUtil.es_client = Elasticsearch(es_host_list,
|
|
timeout=60,
|
|
max_retries=10,
|
|
retry_on_timeout=True)
|
|
self.es = EsUtil.es_client
|
|
|
|
def get_client(self):
|
|
"""
|
|
提供原生的es_Client
|
|
:return:
|
|
"""
|
|
return self.es
|
|
|
|
def is_index_exist(self, index_name):
|
|
return self.es.indices.exists(index=index_name)
|
|
|
|
def get_available_index(self, start_time=None, end_time=None, prefix=None, suffix=None):
|
|
results = list()
|
|
index_ = "*"
|
|
start_date = None
|
|
end_date = None
|
|
if prefix:
|
|
index_ = prefix + index_
|
|
if suffix:
|
|
index_ = index_ + suffix
|
|
res = self.es.cat.indices(index=index_, format="json")
|
|
if start_time:
|
|
start_date = datetime.datetime.fromtimestamp(start_time / 1000).strftime("%Y%m%d")
|
|
|
|
if end_time:
|
|
end_date = datetime.datetime.fromtimestamp(end_time / 1000).strftime("%Y%m%d")
|
|
|
|
for ind in res:
|
|
indices = ind.get('index', '').split('-')
|
|
if start_date and len(indices) > 1:
|
|
if indices[-2] < start_date:
|
|
continue
|
|
if end_date and len(indices) > 1:
|
|
if indices[-2] > end_date:
|
|
continue
|
|
results.append(ind)
|
|
return results
|
|
|
|
def get_available_index_name(self, start_time=None, end_time=None, prefix=None, suffix=None):
|
|
results = list()
|
|
indices = self.get_available_index(start_time, end_time, prefix, suffix)
|
|
if not indices:
|
|
return results
|
|
for index_ in indices:
|
|
results.append(index_.get("index"))
|
|
return results
|
|
|
|
def search_by_sql(self, sql):
|
|
"""
|
|
sql查询
|
|
注:keyword类型的字段才能进行分组聚合查询
|
|
:param sql:
|
|
:return:
|
|
"""
|
|
return to_dict(self.es.xpack.sql.query(body={'query': sql}))
|
|
|
|
def create_index(self, index_name, field_type_dict, number_of_shards, number_of_replicas):
|
|
"""
|
|
简单的创建索引,暂时支持传入简单的键值对
|
|
:param index_name 索引名称
|
|
:param field_type_dict 字段名称,类型字典
|
|
:param number_of_shards 分片数量
|
|
:param number_of_replicas 副本数量
|
|
:return: 创建成功
|
|
"""
|
|
|
|
if self.is_index_exist(index_name):
|
|
raise Exception('index [%s] is exist' % index_name)
|
|
body = dict()
|
|
settings = {
|
|
'number_of_shards': number_of_shards,
|
|
'number_of_replicas': number_of_replicas
|
|
}
|
|
mappings = dict()
|
|
index_type = dict()
|
|
properties = dict()
|
|
# print field_type_dict
|
|
for (key, value) in field_type_dict.items():
|
|
properties[key] = {'type': value}
|
|
index_type['properties'] = properties
|
|
mappings[TYPE] = index_type
|
|
body['settings'] = settings
|
|
body['mappings'] = mappings
|
|
# print json.dumps(body)
|
|
response = self.es.indices.create(index=index_name, body=body)
|
|
return response['acknowledged'] and response['shards_acknowledged']
|
|
|
|
def create_index_by_mapping_alias(self, index_name, mappings, alias_name):
|
|
"""
|
|
加入别名和动态mapping
|
|
:param index_name:
|
|
:param mappings:
|
|
:param alias_name:
|
|
:return:
|
|
"""
|
|
if self.is_index_exist(index_name):
|
|
raise Exception('index [%s] is exist' % index_name)
|
|
|
|
# es.indices.create(index=index_name)
|
|
# es.indices.put_mapping(index=index_name, body=mapping)
|
|
# es.indices.put_alias(index=index_name,name=alias_name)
|
|
|
|
# 使用一个请求创建
|
|
request_body = dict()
|
|
request_body['settings'] = {
|
|
'number_of_replicas': NUMBER_OF_REPLICAS,
|
|
'number_of_shards': NUMBER_OF_SHARDS
|
|
}
|
|
if isinstance(mappings, dict):
|
|
request_body['mappings'] = mappings
|
|
else:
|
|
request_body['mappings'] = json.loads(mappings)
|
|
request_body[index_name] = {
|
|
'aliases': {
|
|
alias_name: {}
|
|
}
|
|
}
|
|
response = self.es.indices.create(index=index_name, body=request_body)
|
|
return response['acknowledged'] and response['shards_acknowledged']
|
|
|
|
def create_index_simple(self, index_name, field_type_dict):
|
|
"""
|
|
默认五个分片一个副本,或者从配置文件中读取,暂定
|
|
:param index_name:
|
|
:param field_type_dict:
|
|
:return:
|
|
"""
|
|
return self.create_index(index_name, field_type_dict, NUMBER_OF_SHARDS, NUMBER_OF_REPLICAS)
|
|
|
|
def create_index_by_body(self, index_name, request_body):
|
|
"""
|
|
自己传入body进行索引的创建
|
|
:param index_name:
|
|
:param request_body: 用户传入mapping,setting设置{‘mappings’:{'properties‘:{}},'settings':{}}
|
|
:return: 索引是否创建成功
|
|
"""
|
|
if self.is_index_exist(index_name):
|
|
raise Exception('index [%s] is exist' % index_name)
|
|
response = self.es.indices.create(index=index_name, body=request_body)
|
|
return response['acknowledged'] and response['shards_acknowledged']
|
|
|
|
def search(self, index_name, request_body, request_params=dict()):
|
|
"""
|
|
查询接口(原生)
|
|
:param request_params:
|
|
:param index_name:
|
|
:param request_body:
|
|
:return:
|
|
"""
|
|
return self.es.search(index=index_name, body=request_body, params=request_params, request_timeout=60)
|
|
|
|
def search_by_uri(self, index_name, uri_params):
|
|
"""
|
|
通过uri的方式进行查询
|
|
demo: test/_search?q=Panda&df=name&from=10&size=10&sort=age:desc&sort=id:desc
|
|
:param index_name:索引名称,可以为空,会在所有的索引中查询
|
|
:param uri_params: dict类型,类似于
|
|
{
|
|
'q': 'Alice',
|
|
'df': "name",
|
|
'from': 3,
|
|
'size': 10,
|
|
'sort': [
|
|
'age:desc', 'name:desc'
|
|
]
|
|
},详细信息请查询uri语法
|
|
:return:
|
|
"""
|
|
return self.es.search(index=index_name, params=uri_params)
|
|
|
|
def scroll_search(self, index_name, scroll, request_body, request_params=dict()):
|
|
"""
|
|
通过快照进行分页查询,并返回第一个快照查询的结果和快照的id,用于继续查询
|
|
注:此查询只能不停的向后查询,不能返回上一页
|
|
:param request_params:
|
|
:param index_name 索引名称
|
|
:param scroll 快照保留的时间
|
|
:param request_body 查询的请求参数
|
|
:return: response为查询的数据,scroll_msg返回,并用于获取下一次的快照信息,scroll_size可用于跳出循环后记录开始from
|
|
"""
|
|
response = self.es.search(index=index_name, scroll=scroll, body=request_body, params=request_params, request_timeout=60)
|
|
scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll}
|
|
return scroll_msg, response
|
|
|
|
def scroll_next(self, scroll_msg, request_params=dict()):
|
|
"""
|
|
传入scroll_search返回的第一个参数,用于获取下一次的快照
|
|
:param request_params:
|
|
:param scroll_msg:
|
|
:return:
|
|
"""
|
|
response = self.es.scroll(body=scroll_msg, params=request_params)
|
|
scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll_msg.get('scroll')}
|
|
return scroll_msg, response
|
|
|
|
def delete_index(self, index_name):
|
|
"""
|
|
删除
|
|
:param index_name:
|
|
:return:
|
|
"""
|
|
return self.es.indices.delete(index=index_name)['acknowledged']
|
|
|
|
def delete_index_by_alias(self, alias_name):
|
|
"""
|
|
通过别名删除索引和别名,对别名有特殊需求,索引名称为 别名+标志
|
|
此方法有风险,可能会删除其他人创建的alias_name*索引
|
|
谨慎使用
|
|
:return:
|
|
"""
|
|
index_name = '%s*' % alias_name
|
|
try:
|
|
if self.es.indices.exists_alias(name=alias_name, index=index_name):
|
|
self.es.indices.delete_alias(name=alias_name, index=index_name)
|
|
if self.es.indices.exists(index=index_name):
|
|
self.es.indices.delete(index=index_name)
|
|
except:
|
|
traceback.format_exc()
|
|
return not (self.es.indices.exists_alias(name=alias_name, index=index_name) and self.es.indices.exists(
|
|
index=index_name))
|
|
|
|
def index(self, index_name, request_body):
|
|
"""
|
|
单条doc插入
|
|
:param index_name 索引名称
|
|
:param request_body 请求数据dict
|
|
{
|
|
"name": "Alice",
|
|
"address": "武汉",
|
|
"age": 1,
|
|
"birthday": "2019-06-03T18:47:45.999"
|
|
}
|
|
:return:
|
|
"""
|
|
return self.es.index(index=index_name, doc_type=TYPE, body=request_body).get('result')
|
|
|
|
def bulk_insert(self, index_name, data_list):
|
|
"""
|
|
批量插入
|
|
:return:
|
|
"""
|
|
actions = list()
|
|
for data in data_list:
|
|
action = {
|
|
"_index": index_name,
|
|
"_type": TYPE,
|
|
'_source': data
|
|
}
|
|
actions.append(action)
|
|
|
|
return helpers.bulk(self.es, actions)
|
|
|
|
|
|
|
|
def search_after_start(self, index_name, request_body):
|
|
"""
|
|
通过elasticsearch search after 避免深度分页的问题
|
|
:return:
|
|
"""
|
|
if request_body.get('size') is None and request_body.get('sort') is None:
|
|
raise Exception('request body is not validate')
|
|
response = self.es.search(index=index_name, body=request_body)
|
|
search_after_body = {
|
|
'size': request_body.get('size'),
|
|
'sort': request_body.get('sort'),
|
|
'search_after': request_body.get('hits', {}).get('hits', {}).get('sort')
|
|
}
|
|
return search_after_body, response
|
|
|
|
def search_after(self, index_name, search_after_body):
|
|
"""
|
|
search_after
|
|
:param index_name:
|
|
:param search_after_body
|
|
:return:
|
|
"""
|
|
response = self.es.search(index=index_name, body=search_after_body)
|
|
|
|
search_after_body = {
|
|
'size': search_after_body.get('size'),
|
|
'sort': search_after_body.get('sort'),
|
|
'search_after': response.get('hits', {}).get('hits', {}).get('sort')
|
|
}
|
|
return search_after_body, response
|
|
|
|
def add_field(self, index, mapping):
|
|
"""
|
|
新增索引字段
|
|
@param index: 索引名称
|
|
@param mapping: 参数样例 mapping = {
|
|
"properties": {
|
|
field_name: {
|
|
"type": field_type
|
|
}
|
|
}
|
|
}
|
|
@return:
|
|
"""
|
|
|
|
self.es.indices.put_mapping(index=index, doc_type="_doc",body=mapping,include_type_name=True)
|
|
|
|
def bulk_update(self, index, query_dsl):
|
|
"""
|
|
批量更新
|
|
@param index: 索引名称
|
|
@param query_dsl: 满足更新条件的查询语句 {
|
|
"query":{
|
|
"bool":{"must":[
|
|
{
|
|
"terms":{
|
|
"log_id":["c6c8eaca-d891-4f0e-b15b-b02f02dbe4df","92f40a7c-e3f1-412d-9a00-72f22b7ebc9b","4257dbe6-369a-42f5-9f14-4406a3eb5c7a"]
|
|
}
|
|
}
|
|
]}
|
|
},
|
|
"script":{
|
|
"inline":"ctx._source.dport = params.dport",
|
|
"params":{
|
|
"dport":50801
|
|
},
|
|
"lang":"painless"
|
|
}
|
|
}
|
|
@return:
|
|
"""
|
|
self.es.update_by_query(index=index,body=query_dsl)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
es = EsUtil()
|
|
# print es.is_index_exist('test')
|
|
#es.search('', {})
|
|
index_name = "internal_isop_log-*"
|
|
# mapping = {
|
|
# "properties": {
|
|
# "is_report": {
|
|
# "type": "boolean"
|
|
# }
|
|
# }
|
|
# }
|
|
#
|
|
# es.add_field(index_name,mapping)
|
|
index = 'internal_isop_incident-*'
|
|
query_dsl ={
|
|
"query":{
|
|
"bool":{"must":[
|
|
{
|
|
"terms":{
|
|
"id":[ ["9f00c0be-ba38-4edc-9f39-889a57ef89c4cq", "29a9c4dc-e7d4-432b-aef8-d216401cb9e5cq", "8494a6be-f80e-4983-adee-92cbf7ef5c31cq"]]
|
|
}
|
|
}
|
|
]}
|
|
},
|
|
"script":{
|
|
"inline":"ctx._source.is_report = params.is_report",
|
|
"params":{
|
|
"is_report":True
|
|
},
|
|
"lang":"painless"
|
|
}
|
|
}
|
|
es.bulk_update(index,query_dsl) |