diff --git a/README.md b/README.md
index f1406443..73c129d2 100644
--- a/README.md
+++ b/README.md
@@ -163,37 +163,28 @@ Result
### Dictionary Configuration
-`IKAnalyzer.cfg.xml` can be located at `{conf}/analysis-ik/config/IKAnalyzer.cfg.xml`
-or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml`
-
-```xml
-
-
-
- IK Analyzer 扩展配置
-
- custom/mydict.dic;custom/single_word_low_freq.dic
-
- custom/ext_stopword.dic
-
- location
-
- http://xxx.com/xxx.dic
-
+`IKAnalyzer.yml` can be located at `{conf}/analysis-ik/IKAnalyzer.yml`
+
+```yml
+# IK Analyzer 扩展配置
+analysis_ik:
+ # 字典配置
+ dictionary:
+ # 用户可以在这里配置自己的扩展字典
+ ext_dict: ""
+ # 用户可以在这里配置自己的扩展停止词字典
+ ext_stop_word: ""
+ # 用户可以在这里配置远程扩展字典
+ remote_ext_dict: ""
+ # 用户可以在这里配置远程扩展停止词字典
+ remote_ext_stop_word: ""
```
### 热更新 IK 分词使用方法
目前该插件支持热更新 IK 分词,通过上文在 IK 配置文件中提到的如下配置
-```xml
-
- location
-
- location
-```
-
-其中 `location` 是指一个 url,比如 `http://yoursite.com/getCustomDict`,该请求只需满足以下两点即可完成分词热更新。
+`remote_ext_dict`和`remote_ext_stop_word`,他们的参数值是指一个 url,比如 `http://yoursite.com/getCustomDict`,该请求只需满足以下两点即可完成分词热更新。
1. 该 http 请求需要返回两个头部(header),一个是 `Last-Modified`,一个是 `ETag`,这两者都是字符串类型,只要有一个发生变化,该插件就会去抓取新的分词进而更新词库。
@@ -205,6 +196,21 @@ or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml`
have fun.
+如果使用Docker运行ElasticSearch服务(需要定制ElasticSearch镜像,安装上本插件),可以在创建容器时,通过配置环境变量,将上述参数传递进去:
+
+```yml
+elasticsearch:
+ image: my-elasticsearch-chs:7.9.3
+ container_name: elasticsearch
+ environment:
+ - cluster.name=docker-cluster
+ - bootstrap.memory_lock=true
+ - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
+ - discovery.type=single-node
+ - analysis_ik.dictionary.remote_ext_dict=http://www.example.com/dic.txt
+ - analysis_ik.dictionary.remote_ext_stop_word=http://www.example.com/stop-word.txt
+```
+
常见问题
-------
diff --git a/config/IKAnalyzer.cfg.xml b/config/IKAnalyzer.cfg.xml
deleted file mode 100644
index fe69bb20..00000000
--- a/config/IKAnalyzer.cfg.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
- IK Analyzer 扩展配置
-
-
-
-
-
-
-
-
-
diff --git a/config/IKAnalyzer.yml b/config/IKAnalyzer.yml
new file mode 100644
index 00000000..891c130c
--- /dev/null
+++ b/config/IKAnalyzer.yml
@@ -0,0 +1,12 @@
+# IK Analyzer 扩展配置
+analysis_ik:
+ # 字典配置
+ dictionary:
+ # 用户可以在这里配置自己的扩展字典
+ ext_dict: ""
+ # 用户可以在这里配置自己的扩展停止词字典
+ ext_stop_word: ""
+ # 用户可以在这里配置远程扩展字典
+ remote_ext_dict: ""
+ # 用户可以在这里配置远程扩展停止词字典
+ remote_ext_stop_word: ""
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 4737083b..6935b362 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,13 +6,13 @@
4.0.0
org.elasticsearch
elasticsearch-analysis-ik
- ${elasticsearch.version}
+ 7.9.3
jar
IK Analyzer for Elasticsearch
2011
- 7.4.0
+ ${project.version}
1.8
${project.basedir}/src/main/assemblies/plugin.xml
analysis-ik
diff --git a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java
index e6ed25c7..fd666dd2 100644
--- a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java
+++ b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java
@@ -1,6 +1,8 @@
package org.elasticsearch.plugin.analysis.ik;
import org.apache.lucene.analysis.Analyzer;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.IkAnalyzerProvider;
import org.elasticsearch.index.analysis.IkTokenizerFactory;
@@ -8,14 +10,35 @@
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
+import org.apache.logging.log4j.Logger;
+import org.wltea.analyzer.help.ESPluginLoggerFactory;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
public class AnalysisIkPlugin extends Plugin implements AnalysisPlugin {
- public static String PLUGIN_NAME = "analysis-ik";
+ public static String PLUGIN_NAME = "analysis-ik";
+
+ private final static String FILE_NAME = "IKAnalyzer.yml";
+
+ private final Path configPath;
+
+ private static final Logger logger = ESPluginLoggerFactory.getLogger(AnalysisIkPlugin.class.getName());
+
+ private final static String EXT_DICT = "ext_dict";
+ private final static String REMOTE_EXT_DICT = "remote_ext_dict";
+ private final static String EXT_STOP = "ext_stop_word";
+ private final static String REMOTE_EXT_STOP = "remote_ext_stop_word";
+
+ public AnalysisIkPlugin(Settings settings, Path configPath) {
+ this.configPath = configPath;
+ }
@Override
public Map> getTokenizers() {
@@ -38,4 +61,27 @@ public Map> getSettings() {
+ String[] dictionaries = { EXT_DICT, EXT_STOP, REMOTE_EXT_DICT, REMOTE_EXT_STOP };
+ List> settings = new ArrayList>();
+ for (String dictionary : dictionaries) {
+ String[] keyInfo = { PLUGIN_NAME.replace("-", "_"), "dictionary", dictionary };
+ String key = String.join(".", keyInfo);
+ Setting setting = Setting.simpleString(key, "", Setting.Property.NodeScope);
+ settings.add(setting);
+ }
+ return settings;
+ }
}
diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
index 2e72ed02..d86827f5 100644
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@@ -45,7 +45,6 @@
import org.apache.http.Header;
import org.apache.http.HttpEntity;
-import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
@@ -53,6 +52,7 @@
import org.apache.http.impl.client.HttpClients;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.common.io.PathUtils;
+import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
import org.wltea.analyzer.cfg.Configuration;
import org.apache.logging.log4j.Logger;
@@ -91,50 +91,27 @@ public class Dictionary {
private static final String PATH_DIC_PREP = "preposition.dic";
private static final String PATH_DIC_STOP = "stopword.dic";
- private final static String FILE_NAME = "IKAnalyzer.cfg.xml";
private final static String EXT_DICT = "ext_dict";
private final static String REMOTE_EXT_DICT = "remote_ext_dict";
- private final static String EXT_STOP = "ext_stopwords";
- private final static String REMOTE_EXT_STOP = "remote_ext_stopwords";
+ private final static String EXT_STOP = "ext_stop_word";
+ private final static String REMOTE_EXT_STOP = "remote_ext_stop_word";
- private Path conf_dir;
- private Properties props;
+ private Path configDir;
+ private Settings settings;
private Dictionary(Configuration cfg) {
this.configuration = cfg;
- this.props = new Properties();
- this.conf_dir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
- Path configFile = conf_dir.resolve(FILE_NAME);
+ this.configDir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
+ this.settings = cfg.getEnvironment().settings();
+ }
- InputStream input = null;
- try {
- logger.info("try load config from {}", configFile);
- input = new FileInputStream(configFile.toFile());
- } catch (FileNotFoundException e) {
- conf_dir = cfg.getConfigInPluginDir();
- configFile = conf_dir.resolve(FILE_NAME);
- try {
- logger.info("try load config from {}", configFile);
- input = new FileInputStream(configFile.toFile());
- } catch (FileNotFoundException ex) {
- // We should report origin exception
- logger.error("ik-analyzer", e);
- }
- }
- if (input != null) {
- try {
- props.loadFromXML(input);
- } catch (IOException e) {
- logger.error("ik-analyzer", e);
- }
- }
+ public Settings getSettings() {
+ return settings;
}
- private String getProperty(String key){
- if(props!=null){
- return props.getProperty(key);
- }
- return null;
+ private String getDictionarySetting(String key) {
+ String[] keys = { AnalysisIkPlugin.PLUGIN_NAME.replace("-", "_"), "dictionary", key };
+ return settings.get(String.join(".", keys));
}
/**
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
@@ -218,9 +195,8 @@ private void loadDictFile(DictSegment dict, Path file, boolean critical, String
private List getExtDictionarys() {
List extDictFiles = new ArrayList(2);
- String extDictCfg = getProperty(EXT_DICT);
- if (extDictCfg != null) {
-
+ String extDictCfg = getDictionarySetting(EXT_DICT);
+ if (!extDictCfg.trim().equals("")) {
String[] filePaths = extDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
@@ -235,9 +211,9 @@ private List getExtDictionarys() {
private List getRemoteExtDictionarys() {
List remoteExtDictFiles = new ArrayList(2);
- String remoteExtDictCfg = getProperty(REMOTE_EXT_DICT);
- if (remoteExtDictCfg != null) {
-
+ String remoteExtDictCfg = getDictionarySetting(REMOTE_EXT_DICT);
+ if (!remoteExtDictCfg.trim().equals("")) {
+ logger.info(">>>" + remoteExtDictCfg);
String[] filePaths = remoteExtDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
@@ -251,9 +227,8 @@ private List getRemoteExtDictionarys() {
private List getExtStopWordDictionarys() {
List extStopWordDictFiles = new ArrayList(2);
- String extStopWordDictCfg = getProperty(EXT_STOP);
- if (extStopWordDictCfg != null) {
-
+ String extStopWordDictCfg = getDictionarySetting(EXT_STOP);
+ if (!extStopWordDictCfg.trim().equals("")) {
String[] filePaths = extStopWordDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
@@ -268,9 +243,8 @@ private List getExtStopWordDictionarys() {
private List getRemoteExtStopWordDictionarys() {
List remoteExtStopWordDictFiles = new ArrayList(2);
- String remoteExtStopWordDictCfg = getProperty(REMOTE_EXT_STOP);
- if (remoteExtStopWordDictCfg != null) {
-
+ String remoteExtStopWordDictCfg = getDictionarySetting(REMOTE_EXT_STOP);
+ if (!remoteExtStopWordDictCfg.trim().equals("")) {
String[] filePaths = remoteExtStopWordDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
@@ -283,7 +257,7 @@ private List getRemoteExtStopWordDictionarys() {
}
private String getDictRoot() {
- return conf_dir.toAbsolutePath().toString();
+ return configDir.toAbsolutePath().toString();
}