我搜索了很多 stackoverflow 问题、ElasticSearch 文档、论坛,但都失败了。
我尝试设置 ElasticSearch JDBC mysql 数据库并在我的搜索中实现单词搜索的一部分(例如,当您输入“bicycl”脚本时必须搜索自行车)。 我尝试使用 nGram 但我做错了...... 我需要的只是在字符串字段上实现 nGram。
这是我的主要 sql 配置:
curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"url" : "jdbc:mysql://localhost:3306/testowa",
"user" : "root",
"password" : "****",
"sql" : "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
"poll": "10s",
"strategy": "simple",
"schedule" : "0 1-59 0-23 ? * *",
"autocommit" : true,
"index" : "searcher",
"type" : "query_1"
},
"index" : {
"index" : "searcher",
"type" : "query_1",
"settings" : {
"analysis" : {
"filter" : {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer" : {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "my_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"my_search_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "nGram"]
}
},
"tokenizer" : {
"my_ngram_tokenizer" : {
"type" : "nGram",
"min_gram" : "3",
"max_gram" : "20",
"token_chars": [ "letter", "digit" ]
}
}
}
}
},
"type_mapping" : {
"searcher" : {
"query_1" : {
"_all" : {
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"categories_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"manufacturers_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_code" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_model" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_search_tags" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description_seo_tag" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
}
}
}
}
}'
我做错了什么?
最佳答案
第一个错误是您的 JDBC River 规范。 index
和 type_mapping
需要进入 jdbc
结构内部,而不是外部(+您的类型映射错过了 property
code> 关键字在 _all
字段之后),所以一旦你纠正了这个,应该会产生类似的结果:
curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
"type": "jdbc",
"jdbc": {
"url": "jdbc:mysql://localhost:3306/testowa",
"user": "root",
"password": "****",
"sql": "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
"poll": "10s",
"strategy": "simple",
"schedule": "0 1-59 0-23 ? * *",
"autocommit": true,
"index": "searcher",
"index_settings": { <-- index settings, analyzers go here
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "my_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"my_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"nGram"
]
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "nGram",
"min_gram": "3",
"max_gram": "20",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"type": "query_1",
"type_mapping": { <--- your type mapping goes here
"query_1": {
"_all": {
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"properties": {
"products_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"categories_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"manufacturers_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_code": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_model": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_search_tags": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description_seo_tag": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
}
}
}
}
}
}'
关于mysql - ElasticSearch 和 jdbc - 映射、分析器、过滤器设置,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32452979/