elasticsearch - 字符串字段中所有 token 的ElasticSearch聚合

标签 elasticsearch aggregation

我有ElasticSearch 2.4,并且正在尝试对包含多个 token 的String类型的文本字段进行汇总。有问题的字段是一个称为mailingAddress的地址字段。例如,以下是一些在地址字段中查找NY的结果。

{
  "from": 0,
  "size": 100,
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    }
  ],
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "must": [
              {
                "match": {
                  "customerprofile.mailingAddress": {
                    "query": "NY",
                    "fuzziness": 0,
                    "operator": "or"
                  }
                }
              },
              {
                "match": {
                  "customerprofile.companyId": {
                    "query": "999",
                    "fuzziness": 0,
                    "operator": "or"
                  }
                }
              }
            ]
          }
        }
      ]
    }
  }
}

退货
"hits":[  
   {  
      "_index":"wht_index_prod_v33_es24",
      "_type":"customerprofile",
      "_id":"2044",
      "_score":2.9787974,
      "_source":{  
         "customerId":2044,
         "companyId":2007,
         "fullName":"John Doe",
         "email":"jon@aol.com",
         "pictureURL":"john.png",
         "profilePictureContentType":"image/png",
         "phone":"(703) 999-8888",
         "mailingAddress":"100 Lake Braddock Drive\nBurke, NY 22015",
         "gender":"Male",
         "emergencyContactsIds":[  

         ],
         "wantCorrespondence":false
      }
   },
   {  
      "_index":"wht_index_prod_v33_es24",
      "_type":"customerprofile",
      "_id":"2045",
      "_score":2.9787974,
      "_source":{  
         "customerId":2045,
         "companyId":2007,
         "fullName":"Jane Anderson",
         "email":"janea@touchva.net",
         "pictureURL":"JAnderson.png",
         "profilePictureContentType":"image/png",
         "phone":"(434) 111-2345",
         "mailingAddress":"PO Box 333, Boydton, NY 23917",
         "gender":"Male",
         "emergencyContactsIds":[  

         ],
         "wantCorrespondence":false
      }
   },
..
..
]

问题
当我通过mailingAddress进行汇总时,我希望在文本字段中看到每个单词的存储桶。从上面的结果中,我希望还会找到一个名为“NY”的存储桶键,但是没有一个。谁能解释原因-我的猜测是条目太少了?

汇总:
{
  "size": 0,
  "aggs": {
    "group_by_age": {
      "terms": {
        "field": "mailingAddress"
      },
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "gender"
          }
        }
      }
    }
  }
}

汇总结果:
{
  "took": 16,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
  },
  "hits": {
    "total": 401,
    "max_score": 0,
    "hits": [

    ]
  },
  "aggregations": {
    "group_by_age": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 1041,
      "buckets": [
        {
          "key": "st",
          "doc_count": 30,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 17
              },
              {
                "key": "male",
                "doc_count": 13
              }
            ]
          }
        },
        {
          "key": "ca",
          "doc_count": 28,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 21
              },
              {
                "key": "male",
                "doc_count": 7
              }
            ]
          }
        },
        {
          "key": "dr",
          "doc_count": 16,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 13
              },
              {
                "key": "male",
                "doc_count": 3
              }
            ]
          }
        },
        {
          "key": "street",
          "doc_count": 15,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 11
              },
              {
                "key": "male",
                "doc_count": 4
              }
            ]
          }
        },
        {
          "key": "ave",
          "doc_count": 14,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 7
              }
            ]
          }
        },
        {
          "key": "box",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 9
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "fl",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 9
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "va",
          "doc_count": 11,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "male",
                "doc_count": 6
              },
              {
                "key": "female",
                "doc_count": 5
              }
            ]
          }
        },
        {
          "key": "n",
          "doc_count": 10,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 3
              }
            ]
          }
        },
        {
          "key": "az",
          "doc_count": 9,
          "group_by_gender": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "female",
                "doc_count": 7
              },
              {
                "key": "male",
                "doc_count": 2
              }
            ]
          }
        }
      ]
    }
  }
}

最佳答案

默认情况下,terms聚合返回前10个术语,但是您可以通过在聚合中指定size来决定返回更多术语,如下所示:

{
  "size": 0,
  "aggs": {
    "group_by_age": {
      "terms": {
        "field": "mailingAddress",
        "size": 50                       <---- add this
      },
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "gender"
          }
        }
      }
    }
  }
}

您的里程可能会有所不同,您可能需要增加大小才能真正看到NY

关于elasticsearch - 字符串字段中所有 token 的ElasticSearch聚合,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42323501/

相关文章:

elasticsearch - 如何在elasticsearch查询中获得3个随机搜索结果

elasticsearch - Elasticsearch 聚合分页问题

java - JAVA(DAO)和SQL中的聚合关系

elasticsearch - Elasticsearch 聚合始于

node.js - Elasticsearch 对部分字符串而非完整字符串进行聚合

elasticsearch - elasticsearch-无法通过curl访问localhost:9200

elasticsearch - 如何在elasticsearch中执行特定的搜索查询?

elasticsearch - 具有距离首选项的Elasticsearch Geosearch

elasticsearch - 在Elasticsearch上汇总结果时,是否可以返回其他字段?

elasticsearch - 脚本字段上的ElasticSearch聚合