# -*- coding: utf-8 -*- # @Time : 2019/6/6 9:48 # @Author : panda import datetime import json import time import traceback from elasticsearch import Elasticsearch from elasticsearch import helpers from appsUtils.confutil import ConfUtil conf_util = ConfUtil() es_dict = conf_util.getElasticsearchConf() es_host_list = list() for ip in es_dict.get('ip').split(','): es_host_list.append({'host': ip, 'port': int(es_dict.get('webport'))}) # es_host_list = [{'host': '10.67.1.180', 'port': 9200}] # 默认type TYPE = '_doc' # 默认分片数量 NUMBER_OF_SHARDS = 5 # 默认副本数量 NUMBER_OF_REPLICAS = 1 def to_dict(data): """ 将sql 获取的list结果集转化为[{xx=xxx,x=xxx},{}] :param data: sql查询的数据 :return: """ res = list() columns = data.get('columns') for row in data.get('rows'): tmp = dict() for i in range(0, len(row)): tmp[columns[i]['name']] = row[i] res.append(tmp) return res class EsUtil(object): es_client = None def __init__(self): if not EsUtil.es_client: EsUtil.es_client = Elasticsearch(es_host_list, timeout=60, max_retries=10, retry_on_timeout=True) self.es = EsUtil.es_client def get_client(self): """ 提供原生的es_Client :return: """ return self.es def is_index_exist(self, index_name): return self.es.indices.exists(index=index_name) def get_available_index(self, start_time=None, end_time=None, prefix=None, suffix=None): results = list() index_ = "*" start_date = None end_date = None if prefix: index_ = prefix + index_ if suffix: index_ = index_ + suffix res = self.es.cat.indices(index=index_, format="json") if start_time: start_date = datetime.datetime.fromtimestamp(start_time / 1000).strftime("%Y%m%d") if end_time: end_date = datetime.datetime.fromtimestamp(end_time / 1000).strftime("%Y%m%d") for ind in res: indices = ind.get('index', '').split('-') if start_date and len(indices) > 1: if indices[-2] < start_date: continue if end_date and len(indices) > 1: if indices[-2] > end_date: continue results.append(ind) return results def get_available_index_name(self, start_time=None, end_time=None, prefix=None, suffix=None): results = list() indices = self.get_available_index(start_time, end_time, prefix, suffix) if not indices: return results for index_ in indices: results.append(index_.get("index")) return results def search_by_sql(self, sql): """ sql查询 注:keyword类型的字段才能进行分组聚合查询 :param sql: :return: """ return to_dict(self.es.xpack.sql.query(body={'query': sql})) def create_index(self, index_name, field_type_dict, number_of_shards, number_of_replicas): """ 简单的创建索引,暂时支持传入简单的键值对 :param index_name 索引名称 :param field_type_dict 字段名称,类型字典 :param number_of_shards 分片数量 :param number_of_replicas 副本数量 :return: 创建成功 """ if self.is_index_exist(index_name): raise Exception('index [%s] is exist' % index_name) body = dict() settings = { 'number_of_shards': number_of_shards, 'number_of_replicas': number_of_replicas } mappings = dict() index_type = dict() properties = dict() # print field_type_dict for (key, value) in field_type_dict.items(): properties[key] = {'type': value} index_type['properties'] = properties mappings[TYPE] = index_type body['settings'] = settings body['mappings'] = mappings # print json.dumps(body) response = self.es.indices.create(index=index_name, body=body) return response['acknowledged'] and response['shards_acknowledged'] def create_index_by_mapping_alias(self, index_name, mappings, alias_name): """ 加入别名和动态mapping :param index_name: :param mappings: :param alias_name: :return: """ if self.is_index_exist(index_name): raise Exception('index [%s] is exist' % index_name) # es.indices.create(index=index_name) # es.indices.put_mapping(index=index_name, body=mapping) # es.indices.put_alias(index=index_name,name=alias_name) # 使用一个请求创建 request_body = dict() request_body['settings'] = { 'number_of_replicas': NUMBER_OF_REPLICAS, 'number_of_shards': NUMBER_OF_SHARDS } if isinstance(mappings, dict): request_body['mappings'] = mappings else: request_body['mappings'] = json.loads(mappings) request_body[index_name] = { 'aliases': { alias_name: {} } } response = self.es.indices.create(index=index_name, body=request_body) return response['acknowledged'] and response['shards_acknowledged'] def create_index_simple(self, index_name, field_type_dict): """ 默认五个分片一个副本,或者从配置文件中读取,暂定 :param index_name: :param field_type_dict: :return: """ return self.create_index(index_name, field_type_dict, NUMBER_OF_SHARDS, NUMBER_OF_REPLICAS) def create_index_by_body(self, index_name, request_body): """ 自己传入body进行索引的创建 :param index_name: :param request_body: 用户传入mapping,setting设置{‘mappings’:{'properties‘:{}},'settings':{}} :return: 索引是否创建成功 """ if self.is_index_exist(index_name): raise Exception('index [%s] is exist' % index_name) response = self.es.indices.create(index=index_name, body=request_body) return response['acknowledged'] and response['shards_acknowledged'] def search(self, index_name, request_body, request_params=dict()): """ 查询接口(原生) :param request_params: :param index_name: :param request_body: :return: """ return self.es.search(index=index_name, body=request_body, params=request_params, request_timeout=60) def search_by_uri(self, index_name, uri_params): """ 通过uri的方式进行查询 demo: test/_search?q=Panda&df=name&from=10&size=10&sort=age:desc&sort=id:desc :param index_name:索引名称,可以为空,会在所有的索引中查询 :param uri_params: dict类型,类似于 { 'q': 'Alice', 'df': "name", 'from': 3, 'size': 10, 'sort': [ 'age:desc', 'name:desc' ] },详细信息请查询uri语法 :return: """ return self.es.search(index=index_name, params=uri_params) def scroll_search(self, index_name, scroll, request_body, request_params=dict()): """ 通过快照进行分页查询,并返回第一个快照查询的结果和快照的id,用于继续查询 注:此查询只能不停的向后查询,不能返回上一页 :param request_params: :param index_name 索引名称 :param scroll 快照保留的时间 :param request_body 查询的请求参数 :return: response为查询的数据,scroll_msg返回,并用于获取下一次的快照信息,scroll_size可用于跳出循环后记录开始from """ response = self.es.search(index=index_name, scroll=scroll, body=request_body, params=request_params, request_timeout=60) scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll} return scroll_msg, response def scroll_next(self, scroll_msg, request_params=dict()): """ 传入scroll_search返回的第一个参数,用于获取下一次的快照 :param request_params: :param scroll_msg: :return: """ response = self.es.scroll(body=scroll_msg, params=request_params) scroll_msg = {'scroll_id': response.get('_scroll_id'), 'scroll': scroll_msg.get('scroll')} return scroll_msg, response def delete_index(self, index_name): """ 删除 :param index_name: :return: """ return self.es.indices.delete(index=index_name)['acknowledged'] def delete_index_by_alias(self, alias_name): """ 通过别名删除索引和别名,对别名有特殊需求,索引名称为 别名+标志 此方法有风险,可能会删除其他人创建的alias_name*索引 谨慎使用 :return: """ index_name = '%s*' % alias_name try: if self.es.indices.exists_alias(name=alias_name, index=index_name): self.es.indices.delete_alias(name=alias_name, index=index_name) if self.es.indices.exists(index=index_name): self.es.indices.delete(index=index_name) except: traceback.format_exc() return not (self.es.indices.exists_alias(name=alias_name, index=index_name) and self.es.indices.exists( index=index_name)) def index(self, index_name, request_body): """ 单条doc插入 :param index_name 索引名称 :param request_body 请求数据dict { "name": "Alice", "address": "武汉", "age": 1, "birthday": "2019-06-03T18:47:45.999" } :return: """ return self.es.index(index=index_name, doc_type=TYPE, body=request_body).get('result') def bulk_insert(self, index_name, data_list): """ 批量插入 :return: """ actions = list() for data in data_list: action = { "_index": index_name, "_type": TYPE, '_source': data } actions.append(action) return helpers.bulk(self.es, actions) def search_after_start(self, index_name, request_body): """ 通过elasticsearch search after 避免深度分页的问题 :return: """ if request_body.get('size') is None and request_body.get('sort') is None: raise Exception('request body is not validate') response = self.es.search(index=index_name, body=request_body) search_after_body = { 'size': request_body.get('size'), 'sort': request_body.get('sort'), 'search_after': request_body.get('hits', {}).get('hits', {}).get('sort') } return search_after_body, response def search_after(self, index_name, search_after_body): """ search_after :param index_name: :param search_after_body :return: """ response = self.es.search(index=index_name, body=search_after_body) search_after_body = { 'size': search_after_body.get('size'), 'sort': search_after_body.get('sort'), 'search_after': response.get('hits', {}).get('hits', {}).get('sort') } return search_after_body, response def add_field(self, index, mapping): """ 新增索引字段 @param index: 索引名称 @param mapping: 参数样例 mapping = { "properties": { field_name: { "type": field_type } } } @return: """ self.es.indices.put_mapping(index=index, doc_type="_doc",body=mapping,include_type_name=True) def bulk_update(self, index, query_dsl): """ 批量更新 @param index: 索引名称 @param query_dsl: 满足更新条件的查询语句 { "query":{ "bool":{"must":[ { "terms":{ "log_id":["c6c8eaca-d891-4f0e-b15b-b02f02dbe4df","92f40a7c-e3f1-412d-9a00-72f22b7ebc9b","4257dbe6-369a-42f5-9f14-4406a3eb5c7a"] } } ]} }, "script":{ "inline":"ctx._source.dport = params.dport", "params":{ "dport":50801 }, "lang":"painless" } } @return: """ self.es.update_by_query(index=index,body=query_dsl) if __name__ == '__main__': es = EsUtil() # print es.is_index_exist('test') #es.search('', {}) index_name = "internal_isop_log-*" # mapping = { # "properties": { # "is_report": { # "type": "boolean" # } # } # } # # es.add_field(index_name,mapping) index = 'internal_isop_incident-*' query_dsl ={ "query":{ "bool":{"must":[ { "terms":{ "id":[ ["9f00c0be-ba38-4edc-9f39-889a57ef89c4cq", "29a9c4dc-e7d4-432b-aef8-d216401cb9e5cq", "8494a6be-f80e-4983-adee-92cbf7ef5c31cq"]] } } ]} }, "script":{ "inline":"ctx._source.is_report = params.is_report", "params":{ "is_report":True }, "lang":"painless" } } es.bulk_update(index,query_dsl)