45 Elasticsearch docker安装ik插件及预设分词器 - xiaoxin01/Blog GitHub Wiki
在使用Elasticsearch时处理中文内容时,需要安装额外的解析器来处理中文分词。
以ik为例:
ARG ELK_VERSION=6.2.2
FROM docker.elastic.co/elasticsearch/elasticsearch-oss:$ELK_VERSION
ARG ELK_VERSION
RUN ./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v$ELK_VERSION/elasticsearch-analysis-ik-$ELK_VERSION.zip
PUT /_template/template_1
{
"index_patterns" : ["*"],
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"content": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "my_custom_analyzer"
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "my_custom_analyzer"
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "my_custom_analyzer"
}
}
}
}
}
如果配置之前数据已经存在elasticsearch中,则可以用reindex功能来重新索引数据
POST _reindex
{
"source": {
"index": "origin_index"
},
"dest": {
"index": "new_index"
}
}
delete origin_index
POST _reindex
{
"source": {
"index": "new_index"
},
"dest": {
"index": "origin_index"
}
}
GET origin_index/_search/template
{
"source" : {
"_source": ["title", "meta"],
"query": {
"multi_match" : {
"query": "{{keyword}}",
"fields": [ "title", "content", "description" ]
}
},
"highlight" : {
"number_of_fragments" : 3,
"fragment_size" : 150,
"fields" : {
"content" : {"pre_tags" : ["<mark>"], "post_tags" : ["</mark>"]},
"description" : {"pre_tags" : ["<mark>"], "post_tags" : ["</mark>"]},
"title" : {"pre_tags" : ["<mark>"], "post_tags" : ["</mark>"]}
}
}
},
"params" : {
"keyword" : "手机"
}
}