elasticsearch对text类型进行聚合时遇到的问题

编程入门行业动态更新时间:2024-10-24 01:49:05

在elasticsearch中对数据进行聚合，想得出每个来源的数据量，mapping:

  "news_source": {
     "type": "text"
   },
   "related_freq": {
     "type": "integer"
   },

查询条件:

GET /event_news/_search
{
  "size": 0, 
  "aggs": {
    "news_source_info":{
      "terms": {
        "field": "news_source"
      },
      "aggs": {
        "total_sum": {
          "sum": {"field": "related_freq"}
        }
      }
    }
  }
}

结果报错

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [news_abstract] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "event_news",
        "node": "9nWATfvgTJiLmbYg-RpPQw",
        "reason": {
          "type": "illegal_argument_exception",
          "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [news_source] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
        }
      }
    ],
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [news_source] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.",
      "caused_by": {
        "type": "illegal_argument_exception",
        "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [news_source] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
      }
    }
  },
  "status": 400
}

恩，需要将字段fielddata设置为true才可以,进行修改：

PUT /event_news/_mapping
{"properties":{"news_source":{"type":"text","fielddata":true}}}

现在的mapping

  "news_source": {
     "type": "text",
     "fielddata": true
   },
   "related_freq": {
     "type": "integer"
   },

再次查询，结果：

{
  "aggregations" : {
    "news_source_info" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 24907577,
      "buckets" : [
        {
          "key" : "财",
          "doc_count" : 8965316,
          "total_sum" : {
            "value" : 1.34136E7
          }
        },
        {
          "key" : "经",
          "doc_count" : 7768020,
          "total_sum" : {
            "value" : 1.037061E7
          }
        },
        {
          "key" : "新",
          "doc_count" : 7578602,
          "total_sum" : {
            "value" : 1.0302178E7
          }
        },
        {
          "key" : "浪",
          "doc_count" : 6764223,
          "total_sum" : {
            "value" : 8648774.0
         }
         }
         ....
}

怎么每个字段都被分开了？原来使用text类型存储时会使用分词器分割好的数据存储，这样只好在设置一个keyword类型的字段

PUT /event_news/_mapping
{"properties":{"news_source":{"type":"text","fields":  {"raw": {"type": "keyword"}},"fielddata":true}}}

mapping:

  "news_source": {
     "type": "text",
     "fields": {
       "raw": {
         "type": "keyword"
       }
     },
     "fielddata": true
   },
     "related_freq": {
     "type": "integer"
   }

查询：

GET /event_news/_search
{
  "size": 0, 
  "aggs": {
    "news_source_info":{
      "terms": {
        "field": "news_source.raw"
      },
      "aggs": {
        "total_sum": {
          "sum": {"field": "related_freq"}
        }
      }
    }

  }
}

结果：

    "news_source_info" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 576,
      "buckets" : [
        {
          "key" : "长城网",
          "doc_count" : 47,
          "total_sum" : {
            "value" : 51.0
          }
        },
        {
          "key" : "新华网",
          "doc_count" : 45,
          "total_sum" : {
            "value" : 1305.0
          }
        },
        {
          "key" : "中工网",
          "doc_count" : 38,
          "total_sum" : {
            "value" : 303.0
          }
        }
     }

这样就可以了

更多推荐

elasticsearch对text类型进行聚合时遇到的问题

本文发布于:2023-06-10 16:29:00，感谢您对本站的认可！

本文链接:https://www.elefans.com/category/jswz/34/620830.html