mysql - ElasticSearch 和 jdbc - 映射、分析器、过滤器设置

标签 mysql jdbc elasticsearch installation mapping

我搜索了很多 stackoverflow 问题、ElasticSearch 文档、论坛,但都失败了。

我尝试设置 ElasticSearch JDBC mysql 数据库并在我的搜索中实现单词搜索的一部分(例如,当您输入“bicycl”脚本时必须搜索自行车)。 我尝试使用 nGram 但我做错了...... 我需要的只是在字符串字段上实现 nGram。

这是我的主要 sql 配置:

curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
    "url" : "jdbc:mysql://localhost:3306/testowa",
    "user" : "root",
    "password" : "****",
    "sql" : "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND  status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
    "poll": "10s",
    "strategy": "simple",
    "schedule" : "0 1-59 0-23 ? * *",
    "autocommit" : true,
    "index" : "searcher",
    "type" : "query_1"
},
"index" : {
  "index" : "searcher",
  "type" : "query_1",
  "settings" : {
      "analysis" : {
          "filter" : {
              "nGram_filter": {
                 "type": "nGram",
                 "min_gram": 2,
                 "max_gram": 20,
                 "token_chars": [
                    "letter",
                    "digit",
                    "punctuation",
                    "symbol"
                 ]
              }
          },
          "analyzer" : {
              "nGram_analyzer": {
                 "type": "custom",
                 "tokenizer": "my_ngram_tokenizer",
                 "filter": [
                    "lowercase",
                    "asciifolding",
                    "nGram_filter"
                 ]
              },
              "my_search_analyzer" : {
                  "type" : "custom",
                  "tokenizer" : "standard",
                  "filter" : ["standard", "lowercase", "nGram"]
              }
          },
          "tokenizer" : {
              "my_ngram_tokenizer" : {
                  "type" : "nGram",
                  "min_gram" : "3",
                  "max_gram" : "20",
                  "token_chars": [ "letter", "digit" ]
              }
          }
      }
  }


},
"type_mapping" : {
    "searcher" : {
      "query_1" : {
          "_all" : {
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_name" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "categories_name" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "manufacturers_name" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_description" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_code" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_model" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_search_tags" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          },
          "products_description_seo_tag" : {
              "type" : "string",
              "analyzer" : "polish",
              "index_analyzer": "nGram_analyzer",
              "search_analyzer": "my_search_analyzer"
          }
      }

    }

}

}'

我做错了什么?

最佳答案

第一个错误是您的 JDBC River 规范。 indextype_mapping 需要进入 jdbc 结构内部,而不是外部(+您的类型映射错过了 property code> 关键字在 _all 字段之后),所以一旦你纠正了这个,应该会产生类似的结果:

curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
  "type": "jdbc",
  "jdbc": {
    "url": "jdbc:mysql://localhost:3306/testowa",
    "user": "root",
    "password": "****",
    "sql": "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND  status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
    "poll": "10s",
    "strategy": "simple",
    "schedule": "0 1-59 0-23 ? * *",
    "autocommit": true,
    "index": "searcher",
    "index_settings": {                 <-- index settings, analyzers go here
      "analysis": {
        "filter": {
          "nGram_filter": {
            "type": "nGram",
            "min_gram": 2,
            "max_gram": 20,
            "token_chars": [
              "letter",
              "digit",
              "punctuation",
              "symbol"
            ]
          }
        },
        "analyzer": {
          "nGram_analyzer": {
            "type": "custom",
            "tokenizer": "my_ngram_tokenizer",
            "filter": [
              "lowercase",
              "asciifolding",
              "nGram_filter"
            ]
          },
          "my_search_analyzer": {
            "type": "custom",
            "tokenizer": "standard",
            "filter": [
              "standard",
              "lowercase",
              "nGram"
            ]
          }
        },
        "tokenizer": {
          "my_ngram_tokenizer": {
            "type": "nGram",
            "min_gram": "3",
            "max_gram": "20",
            "token_chars": [
              "letter",
              "digit"
            ]
          }
        }
      }
    },
    "type": "query_1",
    "type_mapping": {           <--- your type mapping goes here
      "query_1": {
        "_all": {
          "analyzer": "polish",
          "index_analyzer": "nGram_analyzer",
          "search_analyzer": "my_search_analyzer"
        },
        "properties": {
          "products_name": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "categories_name": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "manufacturers_name": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "products_description": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "products_code": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "products_model": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "products_search_tags": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          },
          "products_description_seo_tag": {
            "type": "string",
            "analyzer": "polish",
            "index_analyzer": "nGram_analyzer",
            "search_analyzer": "my_search_analyzer"
          }
        }
      }
    }
  }
}'

关于mysql - ElasticSearch 和 jdbc - 映射、分析器、过滤器设置,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32452979/

相关文章:

java.util.stream 与 ResultSet

elasticsearch - ELK Stack 在 EC2 容器服务上使用 Docker

elasticsearch - Elasticsearch: “birthday”异常

php - 如果 ID 存在则更新行,否则插入

php - 使用 1 个查询从 mysql 表中选择一个值

java - jBPM + Spring 事务共享和范围

elasticsearch - 随索引变化使Elasticsearch文档频率保持恒定

java - 此 LoginActivity 在哪里检索电话号码。和密码来自?

java - 使用 Java 更新 MySQL 表

java - UCanAccess:将列添加到现有表