elasticsearch - “bool must”上的字词过滤器，不返回任何结果

我有一个查询，该查询在几十条记录上的执行情况都与预期的相同。我们已经开始向ES实例中提供更多数据，但现在还没有得到任何结果:

第一个查询:

{
  "query": {
   "bool": {
      "must": [
        {
          "match": {
            "message": "new connection attempt failed: null"
           }
        }
      ] 
    }
  }
}

我得到了许多记录。这表明记录确实在我的索引中，正如我期望的那样。

如果我从结果中选择记录之一:

 {
        "_index": "logstash-2018.04.12",
        "_type": "log",
        "_id": "AWK3J1xarbUl8ovcY8uv",
        "_score": 6.621839,
        "_source": {
          "cluster": "dev-east-1-c5",
          "offset": 35858135,
          "level": "ERROR",
         ...
      }

and then add a term filter to only get the entries for a specific cluster, I get nothing back (but only when the index gets loaded up with more than a couple thousand records).

 {
   "query": {
     "bool": {
       "must": [
         {
           "match": {
            "message": "new connection attempt failed: null"
           }
         }
      ],
       "filter": [
       {
         "term": {
           "cluster": "dev-east-1-c5"
         }
         }
      ]
    }
 }
   }

用简单的英语描述我要做什么:

message-匹配包含消息字符串的任何条目

然后将其过滤，以仅返回群集名称完全匹配的条目。

编辑4/12/18-根据要求添加日志类型的映射

{  
   "logstash-2018.04.12":{  
      "mappings":{  
     "log":{  
        "_all":{  
           "enabled":true,
           "norms":false
        },
        "dynamic_templates":[  
           {  
              "message_field":{  
                 "path_match":"message",
                 "match_mapping_type":"string",
                 "mapping":{  
                    "norms":false,
                    "type":"text"
                 }
              }
           },
           {  
              "string_fields":{  
                 "match":"*",
                 "match_mapping_type":"string",
                 "mapping":{  
                    "fields":{  
                       "keyword":{  
                          "ignore_above":256,
                          "type":"keyword"
                       }
                    },
                    "norms":false,
                    "type":"text"
                 }
              }
           }
        ],
        "properties":{  
           "@timestamp":{  
              "type":"date",
              "include_in_all":false
           },
           "@version":{  
              "type":"keyword",
              "include_in_all":false
           },
           "application_name":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "application_version":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "beat":{  
              "properties":{  
                 "hostname":{  
                    "type":"text",
                    "norms":false,
                    "fields":{  
                       "keyword":{  
                          "type":"keyword",
                          "ignore_above":256
                       }
                    }
                 },
                 "name":{  
                    "type":"text",
                    "norms":false,
                    "fields":{  
                       "keyword":{  
                          "type":"keyword",
                          "ignore_above":256
                       }
                    }
                 },
                 "version":{  
                    "type":"text",
                    "norms":false,
                    "fields":{  
                       "keyword":{  
                          "type":"keyword",
                          "ignore_above":256
                       }
                    }
                 }
              }
           },
           "cluster":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "geoip":{  
              "dynamic":"true",
              "properties":{  
                 "ip":{  
                    "type":"ip"
                 },
                 "latitude":{  
                    "type":"half_float"
                 },
                 "location":{  
                    "type":"geo_point"
                 },
                 "longitude":{  
                    "type":"half_float"
                 }
              }
           },
           "host":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "input_type":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "level":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "level_value":{  
              "type":"long"
           },
           "logger_name":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "message":{  
              "type":"text",
              "norms":false
           },
           "offset":{  
              "type":"long"
           },
           "source":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "tags":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "thread_name":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
              }
           },
           "type":{  
              "type":"text",
              "norms":false,
              "fields":{  
                 "keyword":{  
                    "type":"keyword",
                    "ignore_above":256
                 }
                }
             }
          }
       }
    }
  }
}

最佳答案

有两个问题:

我的评论中提到了第一个问题。通过对普通的“cluster”而不是“cluster.keyword”执行术语过滤器，分析器可以对事物进行调整，而我在精确匹配中没有获得成功。 (这似乎是2.x版中的方法)

第二个问题是 bool(boolean) 匹配消息。 match没有位置的概念，对于大型数据集，它给出了各种意外结果。解决方法是将bool匹配更改为bool match_phrase，然后根据更新过滤器。

它似乎正在按我现在的方式工作。我有点担心，可能会有更高效的方法来执行此操作。我看到有些人在使用通配符，并且我认为这与之相比有一点改进。不确定是否有我不知道的专家方法。

关于elasticsearch - “bool must”上的字词过滤器，不返回任何结果，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/49786303/

elasticsearch - “bool must”上的字词过滤器，不返回任何结果

上一篇：rest - 带有REST API调用的Powershell脚本无法在启动时运行

下一篇：json - 将 root 添加到 ConvertTo-Json 输出