diff --git a/README.md b/README.md index f1406443..73c129d2 100644 --- a/README.md +++ b/README.md @@ -163,37 +163,28 @@ Result ### Dictionary Configuration -`IKAnalyzer.cfg.xml` can be located at `{conf}/analysis-ik/config/IKAnalyzer.cfg.xml` -or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml` - -```xml - - - - IK Analyzer 扩展配置 - - custom/mydict.dic;custom/single_word_low_freq.dic - - custom/ext_stopword.dic - - location - - http://xxx.com/xxx.dic - +`IKAnalyzer.yml` can be located at `{conf}/analysis-ik/IKAnalyzer.yml` + +```yml +# IK Analyzer 扩展配置 +analysis_ik: + # 字典配置 + dictionary: + # 用户可以在这里配置自己的扩展字典 + ext_dict: "" + # 用户可以在这里配置自己的扩展停止词字典 + ext_stop_word: "" + # 用户可以在这里配置远程扩展字典 + remote_ext_dict: "" + # 用户可以在这里配置远程扩展停止词字典 + remote_ext_stop_word: "" ``` ### 热更新 IK 分词使用方法 目前该插件支持热更新 IK 分词,通过上文在 IK 配置文件中提到的如下配置 -```xml - - location - - location -``` - -其中 `location` 是指一个 url,比如 `http://yoursite.com/getCustomDict`,该请求只需满足以下两点即可完成分词热更新。 +`remote_ext_dict`和`remote_ext_stop_word`,他们的参数值是指一个 url,比如 `http://yoursite.com/getCustomDict`,该请求只需满足以下两点即可完成分词热更新。 1. 该 http 请求需要返回两个头部(header),一个是 `Last-Modified`,一个是 `ETag`,这两者都是字符串类型,只要有一个发生变化,该插件就会去抓取新的分词进而更新词库。 @@ -205,6 +196,21 @@ or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml` have fun. +如果使用Docker运行ElasticSearch服务(需要定制ElasticSearch镜像,安装上本插件),可以在创建容器时,通过配置环境变量,将上述参数传递进去: + +```yml +elasticsearch: + image: my-elasticsearch-chs:7.9.3 + container_name: elasticsearch + environment: + - cluster.name=docker-cluster + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + - discovery.type=single-node + - analysis_ik.dictionary.remote_ext_dict=http://www.example.com/dic.txt + - analysis_ik.dictionary.remote_ext_stop_word=http://www.example.com/stop-word.txt +``` + 常见问题 ------- diff --git a/config/IKAnalyzer.cfg.xml b/config/IKAnalyzer.cfg.xml deleted file mode 100644 index fe69bb20..00000000 --- a/config/IKAnalyzer.cfg.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - IK Analyzer 扩展配置 - - - - - - - - - diff --git a/config/IKAnalyzer.yml b/config/IKAnalyzer.yml new file mode 100644 index 00000000..891c130c --- /dev/null +++ b/config/IKAnalyzer.yml @@ -0,0 +1,12 @@ +# IK Analyzer 扩展配置 +analysis_ik: + # 字典配置 + dictionary: + # 用户可以在这里配置自己的扩展字典 + ext_dict: "" + # 用户可以在这里配置自己的扩展停止词字典 + ext_stop_word: "" + # 用户可以在这里配置远程扩展字典 + remote_ext_dict: "" + # 用户可以在这里配置远程扩展停止词字典 + remote_ext_stop_word: "" \ No newline at end of file diff --git a/pom.xml b/pom.xml index 4737083b..6935b362 100644 --- a/pom.xml +++ b/pom.xml @@ -6,13 +6,13 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-ik - ${elasticsearch.version} + 7.9.3 jar IK Analyzer for Elasticsearch 2011 - 7.4.0 + ${project.version} 1.8 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik diff --git a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java index e6ed25c7..fd666dd2 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java @@ -1,6 +1,8 @@ package org.elasticsearch.plugin.analysis.ik; import org.apache.lucene.analysis.Analyzer; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.IkAnalyzerProvider; import org.elasticsearch.index.analysis.IkTokenizerFactory; @@ -8,14 +10,35 @@ import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; +import org.apache.logging.log4j.Logger; +import org.wltea.analyzer.help.ESPluginLoggerFactory; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; public class AnalysisIkPlugin extends Plugin implements AnalysisPlugin { - public static String PLUGIN_NAME = "analysis-ik"; + public static String PLUGIN_NAME = "analysis-ik"; + + private final static String FILE_NAME = "IKAnalyzer.yml"; + + private final Path configPath; + + private static final Logger logger = ESPluginLoggerFactory.getLogger(AnalysisIkPlugin.class.getName()); + + private final static String EXT_DICT = "ext_dict"; + private final static String REMOTE_EXT_DICT = "remote_ext_dict"; + private final static String EXT_STOP = "ext_stop_word"; + private final static String REMOTE_EXT_STOP = "remote_ext_stop_word"; + + public AnalysisIkPlugin(Settings settings, Path configPath) { + this.configPath = configPath; + } @Override public Map> getTokenizers() { @@ -38,4 +61,27 @@ public Map> getSettings() { + String[] dictionaries = { EXT_DICT, EXT_STOP, REMOTE_EXT_DICT, REMOTE_EXT_STOP }; + List> settings = new ArrayList>(); + for (String dictionary : dictionaries) { + String[] keyInfo = { PLUGIN_NAME.replace("-", "_"), "dictionary", dictionary }; + String key = String.join(".", keyInfo); + Setting setting = Setting.simpleString(key, "", Setting.Property.NodeScope); + settings.add(setting); + } + return settings; + } } diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 2e72ed02..d86827f5 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -45,7 +45,6 @@ import org.apache.http.Header; import org.apache.http.HttpEntity; -import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -53,6 +52,7 @@ import org.apache.http.impl.client.HttpClients; import org.elasticsearch.SpecialPermission; import org.elasticsearch.common.io.PathUtils; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin; import org.wltea.analyzer.cfg.Configuration; import org.apache.logging.log4j.Logger; @@ -91,50 +91,27 @@ public class Dictionary { private static final String PATH_DIC_PREP = "preposition.dic"; private static final String PATH_DIC_STOP = "stopword.dic"; - private final static String FILE_NAME = "IKAnalyzer.cfg.xml"; private final static String EXT_DICT = "ext_dict"; private final static String REMOTE_EXT_DICT = "remote_ext_dict"; - private final static String EXT_STOP = "ext_stopwords"; - private final static String REMOTE_EXT_STOP = "remote_ext_stopwords"; + private final static String EXT_STOP = "ext_stop_word"; + private final static String REMOTE_EXT_STOP = "remote_ext_stop_word"; - private Path conf_dir; - private Properties props; + private Path configDir; + private Settings settings; private Dictionary(Configuration cfg) { this.configuration = cfg; - this.props = new Properties(); - this.conf_dir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME); - Path configFile = conf_dir.resolve(FILE_NAME); + this.configDir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME); + this.settings = cfg.getEnvironment().settings(); + } - InputStream input = null; - try { - logger.info("try load config from {}", configFile); - input = new FileInputStream(configFile.toFile()); - } catch (FileNotFoundException e) { - conf_dir = cfg.getConfigInPluginDir(); - configFile = conf_dir.resolve(FILE_NAME); - try { - logger.info("try load config from {}", configFile); - input = new FileInputStream(configFile.toFile()); - } catch (FileNotFoundException ex) { - // We should report origin exception - logger.error("ik-analyzer", e); - } - } - if (input != null) { - try { - props.loadFromXML(input); - } catch (IOException e) { - logger.error("ik-analyzer", e); - } - } + public Settings getSettings() { + return settings; } - private String getProperty(String key){ - if(props!=null){ - return props.getProperty(key); - } - return null; + private String getDictionarySetting(String key) { + String[] keys = { AnalysisIkPlugin.PLUGIN_NAME.replace("-", "_"), "dictionary", key }; + return settings.get(String.join(".", keys)); } /** * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 @@ -218,9 +195,8 @@ private void loadDictFile(DictSegment dict, Path file, boolean critical, String private List getExtDictionarys() { List extDictFiles = new ArrayList(2); - String extDictCfg = getProperty(EXT_DICT); - if (extDictCfg != null) { - + String extDictCfg = getDictionarySetting(EXT_DICT); + if (!extDictCfg.trim().equals("")) { String[] filePaths = extDictCfg.split(";"); for (String filePath : filePaths) { if (filePath != null && !"".equals(filePath.trim())) { @@ -235,9 +211,9 @@ private List getExtDictionarys() { private List getRemoteExtDictionarys() { List remoteExtDictFiles = new ArrayList(2); - String remoteExtDictCfg = getProperty(REMOTE_EXT_DICT); - if (remoteExtDictCfg != null) { - + String remoteExtDictCfg = getDictionarySetting(REMOTE_EXT_DICT); + if (!remoteExtDictCfg.trim().equals("")) { + logger.info(">>>" + remoteExtDictCfg); String[] filePaths = remoteExtDictCfg.split(";"); for (String filePath : filePaths) { if (filePath != null && !"".equals(filePath.trim())) { @@ -251,9 +227,8 @@ private List getRemoteExtDictionarys() { private List getExtStopWordDictionarys() { List extStopWordDictFiles = new ArrayList(2); - String extStopWordDictCfg = getProperty(EXT_STOP); - if (extStopWordDictCfg != null) { - + String extStopWordDictCfg = getDictionarySetting(EXT_STOP); + if (!extStopWordDictCfg.trim().equals("")) { String[] filePaths = extStopWordDictCfg.split(";"); for (String filePath : filePaths) { if (filePath != null && !"".equals(filePath.trim())) { @@ -268,9 +243,8 @@ private List getExtStopWordDictionarys() { private List getRemoteExtStopWordDictionarys() { List remoteExtStopWordDictFiles = new ArrayList(2); - String remoteExtStopWordDictCfg = getProperty(REMOTE_EXT_STOP); - if (remoteExtStopWordDictCfg != null) { - + String remoteExtStopWordDictCfg = getDictionarySetting(REMOTE_EXT_STOP); + if (!remoteExtStopWordDictCfg.trim().equals("")) { String[] filePaths = remoteExtStopWordDictCfg.split(";"); for (String filePath : filePaths) { if (filePath != null && !"".equals(filePath.trim())) { @@ -283,7 +257,7 @@ private List getRemoteExtStopWordDictionarys() { } private String getDictRoot() { - return conf_dir.toAbsolutePath().toString(); + return configDir.toAbsolutePath().toString(); }