database - ElasticSearch 7索引相对于ElasticSearch 5太大

标签 database elasticsearch memory lucene upgrade

我们正在从ElasticSearch 5.6迁移到7.9的过程中。在5.6上,我们有2个索引,一个包含3.4k文档,占111.2 MB,另一个具有81.6k文档,占845.6 MB。在7.9上,我们具有相同映射的2个索引(通过相同的过程写入),但是它分别使用14.3 GB和15.6 GB。
我不明白是什么使这些指数在7.9与5.6之间如此之大。
如果您感到好奇,请参阅以下映射(我混淆了许多字段的名称以保护我们的数据):
ES 5.6

{
  "blah-state-37c088aea98d4b60ad58fb04abe55aa7": {
    "mappings": {
      "blahblah": {
        "properties": {
          "blahStatus": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "blah": {
            "type": "integer"
          },
          "blahblah": {
            "type": "long"
          },
          "blahblahblah": {
            "type": "text"
          },
          "blahblahblahblah": {
            "type": "integer"
          },
          "blahblahblahzzz": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "ngram_suggest"
          },
          "blahblahblahhh": {
            "type": "text",
            "index": false,
            "store": true
          },
          "blahblahblaaaa": {
            "type": "keyword"
          },
          "created": {
            "type": "text"
          },
          "ended": {
            "type": "text"
          },
          "blaaaaah": {
            "type": "boolean"
          },
          "blaahaah": {
            "type": "integer"
          },
          "bloop": {
            "type": "boolean"
          },
          "bloopibob": {
            "type": "integer"
          },
          "blabiba": {
            "type": "keyword"
          },
          "blah": {
            "type": "long"
          },
          "bleeeep": {
            "type": "boolean"
          },
          "blahhh": {
            "type": "boolean"
          },
          "blahah": {
            "type": "text"
          },
          "hidden": {
            "type": "boolean"
          },
          "blah1": {
            "type": "boolean"
          },
          "blah2": {
            "type": "boolean"
          },
          "blah3": {
            "type": "boolean"
          },
          "blah4": {
            "type": "boolean"
          },
          "blah5": {
            "type": "boolean"
          },
          "blah6": {
            "type": "boolean"
          },
          "blah7": {
            "type": "boolean"
          },
          "blah8": {
            "type": "boolean"
          },
          "blah9": {
            "type": "boolean"
          },
          "blah10": {
            "type": "boolean"
          },
          "blah11": {
            "type": "boolean"
          },
          "blah12": {
            "type": "boolean"
          },
          "blah13": {
            "type": "boolean"
          },
          "isInvalid": {
            "type": "boolean"
          },
          "blah14": {
            "type": "boolean"
          },
          "isNew": {
            "type": "boolean"
          },
          "blah15": {
            "type": "boolean"
          },
          "keywords": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "languages": {
            "type": "keyword"
          },
          "blah16": {
            "type": "integer"
          },
          "blah17": {
            "type": "integer"
          },
          "blah18": {
            "type": "keyword"
          },
          "maxWait": {
            "type": "integer"
          },
          "minBuyIn": {
            "type": "float"
          },
          "nickname": {
            "type": "text",
            "fields": {
              "raw": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "nicknamePartial": {
            "type": "text",
            "analyzer": "ngram_partial"
          },
          "nicknameSuggest": {
            "type": "text",
            "analyzer": "ngram_suggest"
          },
          "blah19": {
            "type": "text"
          },
          "blah20": {
            "type": "boolean"
          },
          "DocumentID": {
            "type": "keyword"
          },
          "pledgedAmt": {
            "type": "float"
          },
          "preferredLanguage": {
            "type": "text"
          },
          "blah21": {
            "type": "integer"
          },
          "blah22": {
            "type": "integer"
          },
          "rating": {
            "type": "integer"
          },
          "region": {
            "type": "keyword"
          },
          "requestedAmt": {
            "type": "float"
          },
          "showInFreeAreas": {
            "type": "boolean"
          },
          "blah23": {
            "type": "boolean"
          },
          "blah24": {
            "type": "text"
          },
          "blah25": {
            "type": "scaled_float",
            "scaling_factor": 100000
          },
          "sortScore": {
            "type": "long"
          },
          "started": {
            "type": "text"
          },
          "statusKey": {
            "type": "text"
          },
          "blah26": {
            "type": "long"
          },
          "blah27": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "tagName": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "tagNameRaw": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "normalized"
          },
          "tagNameSuggest": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            },
            "analyzer": "ngram_suggest"
          },
          "blah28": {
            "type": "boolean"
          },
          "traceId": {
            "type": "object",
            "enabled": false
          },
          "updated": {
            "type": "long"
          },
          "blah29": {
            "type": "boolean"
          }
        }
      }
    }
  }
}
和7.9
{
  "blah-state-37c088aea98d4b60ad58fb04abe55aa7" : {
    "mappings" : {
      "properties" : {
        "accountStatus" : {
          "type" : "keyword"
        },
        "boost" : {
          "type" : "integer"
        },
        "age" : {
          "type" : "integer"
        },
        "bleeeeeep" : {
          "type" : "keyword"
        },
        "bleeeep" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ngram_suggest"
        },
        "bleeep" : {
          "type" : "keyword"
        },
        "bleep" : {
          "type" : "keyword"
        },
        "blah0" : {
          "type" : "boolean"
        },
        "blah1" : {
          "type" : "boolean"
        },
        "blah2" : {
          "type" : "text"
        },
        "hidden" : {
          "type" : "boolean"
        },
        "blah3" : {
          "type" : "boolean"
        },
        "blah4" : {
          "type" : "boolean"
        },
        "blah5" : {
          "type" : "boolean"
        },
        "blah6" : {
          "type" : "boolean"
        },
        "blah7" : {
          "type" : "boolean"
        },
        "blah8" : {
          "type" : "boolean"
        },
        "blah9" : {
          "type" : "boolean"
        },
        "blah10" : {
          "type" : "boolean"
        },
        "blah11" : {
          "type" : "boolean"
        },
        "blah12" : {
          "type" : "boolean"
        },
        "blah13" : {
          "type" : "boolean"
        },
        "blah14" : {
          "type" : "boolean"
        },
        "blah15" : {
          "type" : "boolean"
        },
        "blah16" : {
          "type" : "boolean"
        },
        "isNew" : {
          "type" : "boolean"
        },
        "blah17" : {
          "type" : "boolean"
        },
        "keywords" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "languages" : {
          "type" : "keyword"
        },
        "blah18" : {
          "type" : "integer"
        },
        "blah19" : {
          "type" : "integer"
        },
        "nickname" : {
          "type" : "text",
          "fields" : {
            "raw" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "nicknamePartial" : {
          "type" : "text",
          "analyzer" : "ngram_partial"
        },
        "nicknameSuggest" : {
          "type" : "text",
          "analyzer" : "ngram_suggest"
        },
        "blah20" : {
          "type" : "boolean"
        },
        "blah21" : {
          "type" : "boolean"
        },
        "DocumentId" : {
          "type" : "keyword"
        },
        "preferredLanguage" : {
          "type" : "keyword"
        },
        "rating" : {
          "type" : "integer"
        },
        "region" : {
          "type" : "keyword"
        },
        "blah22" : {
          "type" : "boolean"
        },
        "blah23" : {
          "type" : "boolean"
        },
        "blah24" : {
          "type" : "scaled_float",
          "scaling_factor" : 100000.0
        },
        "sortScore" : {
          "type" : "integer"
        },
        "blah25" : {
          "type" : "keyword"
        },
        "tagName" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "tagNameRaw" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "normalized"
        },
        "tagNameSuggest" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ngram_suggest"
        },
        "blah26" : {
          "type" : "boolean"
        },
        "traceId" : {
          "type" : "object",
          "enabled" : false
        },
        "updated" : {
          "type" : "long"
        },
        "blah27" : {
          "type" : "boolean"
        }
      } 
    }
  }
}
编辑:这是设置:
5.6:
"settings": {
      "index": {
        "analysis": {
          "filter": {
            "english_stemmer": {
              "type": "stemmer",
              "language": "english"
            }
          },
          "analyzer": {
            "ngram_partial": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase"
              ],
              "tokenizer": "ngram"
            },
            "ngram_suggest": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase"
              ],
              "tokenizer": "edge_ngram"
            },
            "normalized": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "english_stemmer"
              ],
              "type": "custom",
              "tokenizer": "standard"
            }
          },
          "tokenizer": {
            "edge_ngram": {
              "token_chars": [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram": "1",
              "type": "edge_ngram",
              "max_gram": "20"
            },
            "ngram": {
              "token_chars": [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram": "2",
              "type": "ngram",
              "max_gram": "20"
            }
          }
        },
        "number_of_shards": "12"
      }
    }
和7.9:
"settings" : {
      "index" : {
        "analysis" : {
          "filter" : {
            "english_stemmer" : {
              "type" : "stemmer",
              "language" : "english"
            }
          },
          "analyzer" : {
            "ngram_partial" : {
              "filter" : [
                "asciifolding",
                "lowercase"
              ],
              "tokenizer" : "ngram"
            },
            "ngram_suggest" : {
              "filter" : [
                "asciifolding",
                "lowercase"
              ],
              "tokenizer" : "edge_ngram"
            },
            "normalized" : {
              "filter" : [
                "asciifolding",
                "lowercase",
                "english_stemmer"
              ],
              "type" : "custom",
              "tokenizer" : "standard"
            }
          },
          "tokenizer" : {
            "edge_ngram" : {
              "token_chars" : [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram" : "1",
              "type" : "edge_ngram",
              "max_gram" : "20"
            },
            "ngram" : {
              "token_chars" : [
                "letter",
                "digit",
                "punctuation"
              ],
              "min_gram" : "3",
              "type" : "ngram",
              "max_gram" : "3"
            }
          }
        },
        "number_of_shards" : "12"
      }
    }
_cat / shards的结果
5.6:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     p      STARTED  960   8mb 000.00.000.84 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960   8mb 000.00.000.89 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960 8.1mb 000.00.000.80 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  960 7.7mb 000.00.000.86 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 9.2mb 000.00.000.90 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 8.9mb 000.00.000.81 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     p      STARTED  978 8.7mb 000.00.000.87 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  978 8.6mb 000.00.000.83 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     p      STARTED  990 8.1mb 000.00.000.85 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 7.6mb 000.00.000.91 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 8.5mb 000.00.000.88 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  990 7.9mb 000.00.000.82 host3
和7.9:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8     p      STARTED  262 673.4mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8     r      STARTED  286 667.8mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9     p      STARTED  278 754.9mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9     r      STARTED  196 729.7mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7     p      STARTED  247 654.2mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7     r      STARTED  262 645.1mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4     p      STARTED  225 719.8mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4     r      STARTED  282 660.9mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6     p      STARTED  274 715.6mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6     r      STARTED  334 706.3mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11    r      STARTED  194 691.6mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11    p      STARTED  255 713.1mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3     p      STARTED  212 716.6mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3     r      STARTED  292 709.3mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     p      STARTED  249 749.5mb 000.00.000.118 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1     r      STARTED  289 695.5mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5     p      STARTED  243 701.4mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5     r      STARTED  204 680.9mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     p      STARTED  246 685.8mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2     r      STARTED  305 676.7mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10    p      STARTED  235 701.2mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10    r      STARTED  276 690.5mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     p      STARTED  245 674.7mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0     r      STARTED  301 623.5mb 000.00.000.118 host4

最佳答案

经过大量实验,我们确定导致此问题的原因是soft deletes。不幸的是,禁用软删除已被弃用,因此这对我们将来是一个问题。

关于database - ElasticSearch 7索引相对于ElasticSearch 5太大,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64248878/

相关文章:

mysql - SQL 查询。连接但在一张表中,每个元组出现 2 次连接属性

spring-boot - Spring Data Elastic - Java.Time.Instant 类 jackson 反序列化不起作用

elasticsearch - 为所有查询实现ElasticSearch自定义过滤器

java - 如何在运行 Android 应用程序时捕获堆内存使用情况?

c - 二维数组中间的内存地址越界?

php - 如何在不中断用户访问的情况下每月刷新大型数据库表

mysql - PrestaShopDatabaseException MySQL 服务器在迁移到 VPS 后消失了

python - html代码问题如何放入数据库

ruby-on-rails - 如何在 rspec 中运行 elasticsearch 服务器

c++ - 为什么我必须在 "original"指针上调用 delete?