Elasticsearch operation by python
from elasticsearch import Elasticsearchfrom elasticsearch.helpers import bulkfrom utils.hash_data import get_hash_idfrom config import configfrom utils.read_url import requst_url_jsonclass OperaEs(object):    def __init__(self):        self.url = 'http://{}:{}'.format(config.ES_IP, config.ES_PORT)        self.es = Elasticsearch([self.url], timeout=300)    def post_data(self, es_index, es_id, data):        # post signal data        if isinstance(es_id, str):            es_id = get_hash_id(data=es_id)        res = self.es.create(index=es_index, doc_type=es_index, id=es_id, body=data, ignore=400)        if res.get('result') == 'created':            return 'success', 201        else:            return 'put fail', 400    def post_batch(self, arr, es_index):        def gendata():            for data in arr:                if self.read_by_es_id(es_index=es_index, es_id=data.get('id')):                    continue                yield {
"_index": es_index, "_type": es_index, "_id": data.get('id'), # 该参数可以为默认 非默认情况下可以判断数据是否重复 "doc": data, } bulk(self.es, gendata()) def read_by_es_id(self, es_index, es_id): # read by es_id if isinstance(es_id, str): es_id = get_hash_id(data=es_id) read_url = '{}/{}/{}/{}'.format(self.url, es_index, es_index, es_id) req = requst_url_json(read_url) return req.get('_source') if req else None def read_by_body(self, es_index, body): res = self.es.search(index=es_index, doc_type=es_index, body=body) return res.get('hits').get('hits') if res.get('hits') else None def delete_index(self, index): self.es.indices.delete(index=index, ignore=[400, 404])


  • 通过 kibana 的 dev tools 的 console 来 translate
  • 直接query:返回结果集
POST /_xpack/sql?format=txt{
"query": """ SELECT count(1),date FROM "spider-fang" where province = '上海市' and city = '上海市' and area = '普陀区' group by date """}
  • translate:返回 body
POST /_xpack/sql/translate{
"query": """ SELECT count(1),date FROM "spider-fang" where province = '上海市' and city = '上海市' and area = '普陀区' group by date """}


  • Limit of total fields [1000] in index
curl -XPUT "" -H "Content-Type:application/json" -d "{"""index.mapping.total_fields.limit""": 50000}"


Could not resolve host
  • 中文字段 无法filter(filter为null的问题)

    使用filter term过滤时,返回结果为null


    将match 替换为 match_phrase

  • Fielddata is disabled on text fields by default. Set fielddata=true


Fielddata is disabled on text fields by default. Set fielddata=true"type": "illegal_argument_exception","reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [region] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."


  1. set index/_mapping/type_name region to “fielddata”: ture
put xytest/_mapping/sutdent  (http://localhost:9200/index_data/_mapping/index_data){
"properties": {
"type": "text", "fielddata": true } }}# local{
"properties": {
"type": "text", "fielddata": true } }}
  1. 在相关field中庸keyword即可
field="country.keyword"  body = {
"size": 0, "aggs": {
"province_count": {
"terms": {
"field": "province.keyword" , "size": 1000000}, # 注意 返回的doc_count_error_upper_bound(最大错误数) 和 sum_other_doc_count (错误数总和) "aggs": {
"industry_count": {
"terms": {
"field": "city.keyword" }, "aggs": {
"industry_count": {
"terms": {
"field": "area.keyword"} } } } } } } }res = es.search(index='spider-fang', doc_type='spider-fang', body=body)res_data = res['aggregations']['province_count']['buckets']
  • Result window is too large

    现象:ES提示返回结果集窗口太大了,目前最大值为10000,而返回结果的body size > 10000


    curl -XPUT "" -H "Content-Type:application/json" -d "{ """index""" : { """max_result_window""" : 100000000}}"

