elasticsearch - Count和Stats API在Elasticsearch中显示不同的文档计数?

标签 elasticsearch

1)我有一个索引“数据”,其中包含479427个文档。为了得到我使用过的GET /data/_search的计数,我得到了这样的响应

"hits": {
    "total": 479427,
    "max_score": 1,

2)我使用count api GET /data/_count检查了,得到的响应是这样的
{
  "count": 479427,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  }
}

从1和2开始,文章数相同。

但是当我尝试GET /data/_stats命令时,我在get和total字段中得到的计数不同。
"data": {
  "primaries": {
    "docs": {
      "count": 2407154,
      "deleted": 357392
    },
    "indexing": {
      "index_total": 4926760,
      "index_time_in_millis": 31181542,
      "index_current": 2744,
      "index_failed": 260136,
      "delete_total": 0,
      "delete_time_in_millis": 0,
      "delete_current": 0,
      "noop_update_total": 175,
      "is_throttled": false,
      "throttle_time_in_millis": 0
    },
    "get": {
      "total": 4773829,
      "time_in_millis": 857412,
      "exists_total": 4773829,
      "exists_time_in_millis": 857412,
      "missing_total": 0,
      "missing_time_in_millis": 0,
      "current": 0
    },
    "merges": {
      "current": 0,
      "current_docs": 0,
      "current_size_in_bytes": 0,
      "total": 82217,
      "total_time_in_millis": 19552894,
      "total_docs": 87321796,
      "total_size_in_bytes": 281542406990,
      "total_stopped_time_in_millis": 0,
      "total_throttled_time_in_millis": 488983,
      "total_auto_throttle_in_bytes": 35543080
    },
    "refresh": {
      "total": 956338,
      "total_time_in_millis": 29144660
    },
    "flush": {
      "total": 697,
      "total_time_in_millis": 469603
    },
    "segments": {
      "count": 114,
      "memory_in_bytes": 7145544,
      "terms_memory_in_bytes": 4862940,
      "stored_fields_memory_in_bytes": 1037064,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 116928,
      "doc_values_memory_in_bytes": 1128612,
      "index_writer_memory_in_bytes": 13753207,
      "index_writer_max_memory_in_bytes": 548831232,
      "version_map_memory_in_bytes": 2182,
      "fixed_bit_set_memory_in_bytes": 350648
    },
    "translog": {
      "operations": 19488,
      "size_in_bytes": 799972235
    },
    "suggest": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0
    },
    "request_cache": {
      "memory_size_in_bytes": 0,
      "evictions": 0,
      "hit_count": 0,
      "miss_count": 0
    },
    "recovery": {
      "current_as_source": 0,
      "current_as_target": 0,
      "throttle_time_in_millis": 13306
    }
  },
  **"total": {
    "docs": {                >Here i am getting total docs count 4814308
      "count": 4814308,
      "deleted": 715540
    },**
    "store": {
      "size_in_bytes": 11910376476,
      "throttle_time_in_millis": 0
    },
    "indexing": {
      "index_total": 9590499,
      "index_time_in_millis": 61324893,
      "index_current": 2744,
      "index_failed": 310323,
      "delete_total": 0,
      "delete_time_in_millis": 0,
      "delete_current": 0,
      "noop_update_total": 175,
      "is_throttled": false,
      "throttle_time_in_millis": 0
    },
    "get": {
      "total": 4773829,
      "time_in_millis": 857412,
      "exists_total": 4773829,
      "exists_time_in_millis": 857412,
      "missing_total": 0,
      "missing_time_in_millis": 0,
      "current": 0
    },
    "search": {
      "open_contexts": 0,
      "query_total": 21901088,
      "query_time_in_millis": 11241895,
      "query_current": 0,
      "fetch_total": 4578094,
      "fetch_time_in_millis": 1774794,
      "fetch_current": 0,
      "scroll_total": 0,
      "scroll_time_in_millis": 0,
      "scroll_current": 0
    },
    "merges": {
      "current": 0,
      "current_docs": 0,
      "current_size_in_bytes": 0,
      "total": 153172,
      "total_time_in_millis": 37586865,
      "total_docs": 170014671,
      "total_size_in_bytes": 542992816504,
      "total_stopped_time_in_millis": 0,
      "total_throttled_time_in_millis": 920242,
      "total_auto_throttle_in_bytes": 71693630
    },
    "refresh": {
      "total": 1841635,
      "total_time_in_millis": 56292736
    },
    "flush": {
      "total": 1343,
      "total_time_in_millis": 946306
    },
    "warmer": {
      "current": 0,
      "total": 3822250,
      "total_time_in_millis": 1098530
    },
    "query_cache": {
      "memory_size_in_bytes": 2706088,
      "total_count": 20222398,
      "hit_count": 4846746,
      "miss_count": 15375652,
      "cache_size": 271,
      "cache_count": 2267,
      "evictions": 1996
    },
    "fielddata": {
      "memory_size_in_bytes": 0,
      "evictions": 0
    },
    "percolate": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0,
      "memory_size_in_bytes": -1,
      "memory_size": "-1b",
      "queries": 0
    },
    "completion": {
      "size_in_bytes": 0
    },
    "segments": {
      "count": 229,
      "memory_in_bytes": 14245875,
      "terms_memory_in_bytes": 9804839,
      "stored_fields_memory_in_bytes": 2068360,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 234752,
      "doc_values_memory_in_bytes": 2137924,
      "index_writer_memory_in_bytes": 25849179,
      "index_writer_max_memory_in_bytes": 1097662464,
      "version_map_memory_in_bytes": 4364,
      "fixed_bit_set_memory_in_bytes": 701360
    },
    "translog": {
      "operations": 39262,
      "size_in_bytes": 1616348491
    },
    "suggest": {
      "total": 0,
      "time_in_millis": 0,
      "current": 0
    },
    "request_cache": {
      "memory_size_in_bytes": 0,
      "evictions": 0,
      "hit_count": 0,
      "miss_count": 0
    },
    "recovery": {
      "current_as_source": 0,
      "current_as_target": 0,
      "throttle_time_in_millis": 32764
    }
  }
}

我什至通过提供curator_cli show_indices --verbose来检查策展人的情况,它给出了如下响应:
data open 11.1GB 4814308 5 1 2017-05-31T13:00:37Z
为什么会有差异?

更新----------------------

1)您是说stats API将嵌套的每个字段都视为文档,对吗?
注意:BTW I am talking about the total.docs.count field in stats api
2)where count API将基于唯一ID给出索引中存在的文档数。

所以我的问题是,我是否想知道索引中存在多少个文件,api会正确地计数countgetstatscurator_cli show_indices --verbose(使用策展人)?

谢谢

最佳答案

stats API包括所谓的nested文档,因为在映射中标记为nested的字段被视为自己的lucene文档,而count API只是在对顶级对象进行计数。

关于elasticsearch - Count和Stats API在Elasticsearch中显示不同的文档计数?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44899246/

相关文章:

javascript - 如何过滤掉复杂查询中的文本字段?

按日期排序的 Elasticsearch 不起作用

spring - spring data elasticsearch中字段的唯一约束

elasticsearch - 无法使用logstash在elasticsearch上创建索引

c# - 在Elasticsearch的查询DSL中使用 `MatchPhrasePrefix`

python - Elasticsearch 返回搜索到的词

elasticsearch - 如何在查询中包装 ElasticSearch 过滤器

c# - 将Elasticsearch JSON查询转换为C#NEST

elasticsearch - 我应该如何配置elasticsearch映射才能像行为那样获得MySQL “like”?

ssl - Elastic Beanstalk 添加多个 ssl 证书