From 98c5217310e184cc16aad087d1876d1eac3aebbf Mon Sep 17 00:00:00 2001 From: "longzhou.lz@alibaba-inc.com" Date: Sat, 21 Mar 2020 00:01:04 +0800 Subject: [PATCH] add settings to enable/disable stopwords in analyzer Change-Id: Ib29ba3a7740cba49145f74b3417e45090bb23f34 --- src/main/java/org/wltea/analyzer/cfg/Configuration.java | 8 ++++++++ src/main/java/org/wltea/analyzer/core/AnalyzeContext.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/wltea/analyzer/cfg/Configuration.java b/src/main/java/org/wltea/analyzer/cfg/Configuration.java index dadd0f20..55bfbf1b 100644 --- a/src/main/java/org/wltea/analyzer/cfg/Configuration.java +++ b/src/main/java/org/wltea/analyzer/cfg/Configuration.java @@ -21,6 +21,9 @@ public class Configuration { //是否启用智能分词 private boolean useSmart; + //是否使用停止词 + private boolean useStopWords; + //是否启用远程词典加载 private boolean enableRemoteDict=false; @@ -34,6 +37,7 @@ public Configuration(Environment env,Settings settings) { this.settings=settings; this.useSmart = settings.get("use_smart", "false").equals("true"); + this.useStopWords = settings.get("use_stopwords", "true").equals("true"); this.enableLowercase = settings.get("enable_lowercase", "true").equals("true"); this.enableRemoteDict = settings.get("enable_remote_dict", "true").equals("true"); @@ -52,6 +56,10 @@ public boolean isUseSmart() { return useSmart; } + public boolean isUseStopWords() { + return useStopWords; + } + public Configuration setUseSmart(boolean useSmart) { this.useSmart = useSmart; return this; diff --git a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java index 890d9080..ea6a0ab0 100644 --- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java +++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java @@ -322,7 +322,7 @@ Lexeme getNextLexeme(){ while(result != null){ //数量词合并 this.compound(result); - if(Dictionary.getSingleton().isStopWord(this.segmentBuff , result.getBegin() , result.getLength())){ + if(cfg.isUseStopWords() && Dictionary.getSingleton().isStopWord(this.segmentBuff , result.getBegin() , result.getLength())){ //是停止词继续取列表的下一个 result = this.results.pollFirst(); }else{