elasticsearch - 复杂 Elasticsearch 查询

我在 flex 搜索索引中有以下文档。

[{
        "_index": "ten2",
        "_type": "documents",
        "_id": "c323c2244a4a4c22_en-us",
        "_source": {
            "publish_details": [{
                    "environment": "603fe91adbdcff66",
                    "time": "2020-06-24T13:36:55.514Z",
                    "locale": "hi-in",
                    "user": "aadab2f531206e9d",
                    "version": 1
                },
                {
                    "environment": "603fe91adbdcff66",
                    "time": "2020-06-24T13:36:55.514Z",
                    "locale": "en-us",
                    "user": "aadab2f531206e9d",
                    "version": 1
                }
            ],
            "created_at": "2020-06-24T13:36:43.037Z",
            "_in_progress": false,
            "title": "Entry 1",
            "locale": "en-us",
            "url": "/entry-1",
            "tags": [],
            "uid": "c323c2244a4a4c22",
            "updated_at": "2020-06-24T13:36:43.037Z",
            "fields": []
        }
    },
    {
        "_index": "ten2",
        "_type": "documents",
        "_id": "c323c2244a4a4c22_mr-in",
        "_source": {
            "publish_details": [{
                "environment": "603fe91adbdcff66",
                "time": "2020-06-24T13:37:26.205Z",
                "locale": "mr-in",
                "user": "aadab2f531206e9d",
                "version": 1
            }],
            "created_at": "2020-06-24T13:36:43.037Z",
            "_in_progress": false,
            "title": "Entry 1 marathi",
            "locale": "mr-in",
            "url": "/entry-1",
            "tags": [],
            "uid": "c323c2244a4a4c22",
            "updated_at": "2020-06-24T13:37:20.092Z",
            "fields": []
        }
    }
]

我希望由此结果[]为空。在这里我们可以看到两个文档的uid是相同的。我正在使用以下查询来获取结果:

{
    "query": {
        "bool": {
            "must": [{
                "bool": {
                    "must_not": [{
                        "bool": {
                            "must": [{
                                "nested": {
                                    "path": "publish_details",
                                    "query": {
                                        "term": {
                                            "publish_details.environment": "603fe91adbdcff66"
                                        }
                                    }
                                }
                            }, {
                                "nested": {
                                    "path": "publish_details",
                                    "query": {
                                        "term": {
                                            "publish_details.locale": "en-us"
                                        }
                                    }
                                }
                            }, {
                                "nested": {
                                    "path": "publish_details",
                                    "query": {
                                        "term": {
                                            "publish_details.locale": "hi-in"
                                        }
                                    }
                                }
                            }, {
                                "nested": {
                                    "path": "publish_details",
                                    "query": {
                                        "term": {
                                            "publish_details.locale": "mr-in"
                                        }
                                    }
                                }
                            }]
                        }
                    }]
                }
            }]
        }
    }
}

但是上面的查询给了我全部2个文档，但是我希望得到结果作为银行，这里的原因是uid是通用的，并且uid包含发布详细信息中的所有三个本地。因此，获取有效结果的方法是，是否有任何聚合查询在这里对我有帮助。这只是一个示例，我有很多文档要过滤掉。 Kindle在这里帮助我。

最佳答案

{
  "aggs": {
    "agg1": {
      "terms": {
        "field": "uid.raw"
      },
      "aggs": {
        "agg2": {
          "nested": {
            "path": "publish_details"
          },
          "aggs": {
            "locales": {
              "terms": {
                "field": "publish_details.locale"
              }
            }
          }
        }
      }
    }
  }
}

该查询将首先按uid对您进行分组，然后按publish_details.locale对您进行分组
它提供如下结果

"aggregations": {
        "agg1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "c323c2244a4a4c22",
                    "doc_count": 2,
                    "agg2": {
                        "doc_count": 3,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                },
                                {
                                    "key": "mr-in",
                                    "doc_count": 1
                                }
                            ]
                        }
                    }
                },
                {
                    "key": "c323c2244rrffa4a4c22",
                    "doc_count": 1,
                    "agg2": {
                        "doc_count": 2,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                }
                            ]
                        }
                    }
                }
            ]

我有三个文档，其中两个具有相同的ID，而另一个具有不同的ID。
我将进一步更新查询，以删除您有3个存储桶的第一个结果。您也可以在代码中进一步处理它。
你可以做到的。 10k文件很好。但是，当您拥有数百万美元时，您应该有足够的资源来执行此操作。

{
  "size" : 0,
  "query":{
      "bool" :{
          "must_not":{
              "match":{
                "publish_details.environment":"603fe91adbdcff66"
              }
          }
      }
  },
  "aggs": {
    "uids": {
      "terms": {
        "field": "uid.raw"
      },
      "aggs": {
        "details": {
          "nested": {
            "path": "publish_details"
          },
          "aggs": {
            "locales": {
              "terms": {
                "field": "publish_details.locale"
              }
            },   
            "unique_locales": {
                "value_count": {
                    "field": "publish_details.locale"
                }
            }
          }
        }
      }
    }
  }
}

结果:

"aggregations": {
        "uids": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "c323c2244a4a4c22",
                    "doc_count": 2,
                    "details": {
                        "doc_count": 3,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                },
                                {
                                    "key": "mr-in",
                                    "doc_count": 1
                                }
                            ]
                        },
                        "unique_locales": {
                            "value": 3
                        }
                    }
                },
                {
                    "key": "c323c2244rrffa4a4c22",
                    "doc_count": 1,
                    "details": {
                        "doc_count": 2,
                        "locales": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "en-us",
                                    "doc_count": 1
                                },
                                {
                                    "key": "hi-in",
                                    "doc_count": 1
                                }
                            ]
                        },
                        "unique_locales": {
                            "value": 2
                        }
                    }
                }
            ]

关于elasticsearch - 复杂 Elasticsearch 查询，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/62566892/

elasticsearch - 复杂 Elasticsearch 查询

上一篇：regex - 正则表达式从域中提取用户名

下一篇：security - 如何使用PowerShell获取进程的启动帐户？