elasticsearch - ElasticSearch-带有过滤器的自定义分析器-未应用过滤器

标签 elasticsearch elasticsearch-analyzers

我有以下查询:

GET /nameofmyindex/_analyze
{
  "text" : "Limousinetesting",
  "explain": true,
  "analyzer": "default"
}


结果是:
{
  "detail" : {
    "custom_analyzer" : true,
    "charfilters" : [ ],
    "tokenizer" : {
      "name" : "standard",
      "tokens" : [
        {
          "token" : "Limousinetesting",
          "start_offset" : 0,
          "end_offset" : 16,
          "type" : "<ALPHANUM>",
          "position" : 0,
          "bytes" : "[4c 69 6d 6f 75 73 69 6e 65 74 65 73 74 69 6e 67]",
          "positionLength" : 1,
          "termFrequency" : 1
        }
      ]
    },
    "tokenfilters" : [ ]
  }
}

我的索引配置如下所示:
{
   "nameofmyindex":{
      "aliases":{

      },
      "mappings":{
         "properties":{
            "author":{
               "type":"integer"
            },
            "body:value":{
               "type":"text",
               "fields":{
                  "keyword":{
                     "type":"keyword",
                     "ignore_above":256
                  }
               }
            },
            "changed":{
               "type":"date",
               "format":"epoch_second"
            },
            "created":{
               "type":"date",
               "format":"epoch_second"
            },
            "id":{
               "type":"keyword"
            },
            "promote":{
               "type":"boolean"
            },
            "search_api_language":{
               "type":"keyword"
            },
            "sticky":{
               "type":"boolean"
            },
            "title":{
               "type":"text",
               "boost":5.0,
               "fields":{
                  "keyword":{
                     "type":"keyword",
                     "ignore_above":256
                  }
               }
            },
            "type":{
               "type":"keyword"
            }
         }
      },
      "settings":{
         "index":{
            "number_of_shards":"1",
            "provided_name":"nameofmyindex",
            "creation_date":"1579792687839",
            "analysis":{
               "filter":{
                  "stop":{
                     "type":"stop",
                     "stopwords":[
                        "i",
                        "me",
                        "my",
                        "myself"
                     ]
                  },
                  "synonym":{
                     "type":"synonym",
                     "lenient":"true",
                     "synonyms":[
                        "P-Card, P Card => P-Card",
                        "limousinetesting => limousine"
                     ]
                  }
               },
               "analyzer":{
                  "default":{
                     "type":"custom",
                     "filters":[
                        "lowercase",
                        "stop",
                        "synonym"
                     ],
                     "tokenizer":"standard"
                  }
               }
            },
            "number_of_replicas":"1",
            "uuid":"QTlVnyWVRLayEfPWTrcgdg",
            "version":{
               "created":"7050199"
            }
         }
      }
   }
}

如您所见,带有过滤器的默认分析器无效,“Limousinetesting”一词没有收到其“limousine”同义词。

分析仪应如何使滤波器有效?即使是最简单的过滤器,在这种情况下也不会发生小写。

最佳答案

问题出在您用于创建索引设置的语法中,我能够重现您的问题并予以解决。问题是您正在使用JSON数组中的filters定义所有过滤器,尽管它可以按照ES official example中的说明在该数组中定义很多过滤器,但它应该只是filter

请在下面找到用于创建索引的正确格式:

{
    "mappings": {
        "properties": {
            "author": {
                "type": "integer"
            },
            "body:value": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "changed": {
                "type": "date",
                "format": "epoch_second"
            },
            "created": {
                "type": "date",
                "format": "epoch_second"
            },
            "id": {
                "type": "keyword"
            },
            "promote": {
                "type": "boolean"
            },
            "search_api_language": {
                "type": "keyword"
            },
            "sticky": {
                "type": "boolean"
            },
            "title": {
                "type": "text",
                "boost": 5,
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "type": {
                "type": "keyword"
            }
        }
    },
    "settings": {
        "index": {
            "number_of_shards": "1",
            "analysis": {
                "filter": {
                    "stop": {
                        "type": "stop",
                        "stopwords": [
                            "i",
                            "me",
                            "my",
                            "myself"
                        ]
                    },
                    "synonym": {
                        "type": "synonym",
                        "lenient": "true",
                        "synonyms": [
                            "P-Card, P Card => P-Card",
                            "limousinetesting => limousine"
                        ]
                    }
                },
                "analyzer": {
                    "default": {
                        "type": "custom",
                        "filter": [ --> Notice the change in filters to filter 
                            "lowercase",
                            "stop",
                            "synonym"
                        ],
                        "tokenizer": "standard"
                    }
                }
            },
            "number_of_replicas": "1"
        }
    }
}

现在,当我使用上面的映射创建索引并使用您的文本命中Analyzer API时,我得到了它的同义词 token limousine,如下面的输出所示。
{
    "tokens": [
        {
            "token": "limousine",
            "start_offset": 0,
            "end_offset": 16,
            "type": "SYNONYM",
            "position": 0
        }
    ]
}

关于elasticsearch - ElasticSearch-带有过滤器的自定义分析器-未应用过滤器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59882193/

相关文章:

elasticsearch - Analyze API不适用于Elasticsearch 1.7

regex - ElasticSearch Analyzer自动完成功能,用于字母数字

用于过滤术语的 Elasticsearch DSL 语法

elasticsearch - 无法在Ubuntu 16上为Elasticsearch安装x-pack插件

elasticsearch - 如何获得多个记录?

c# - 在 Nest 5.5.0 中为属性设置 not_analyzed

curl - 自定义分析器在Elasticsearch中不起作用

elasticsearch - 禁用 Elasticsearch 搜索分析器

ruby - 如何将嵌套的哈希参数传递给searchkick

elasticsearch - 获得满足条件的集合(不同值的列表)