magese
diff --git a/‎README.md‎
Lines changed: 8 additions & 4 deletions b/‎README.md‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎pom.xml‎
Lines changed: 2 additions & 3 deletions b/‎pom.xml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/main/java/org/wltea/analyzer/cfg/Configuration.java‎
Lines changed: 4 additions & 4 deletions b/‎src/main/java/org/wltea/analyzer/cfg/Configuration.java‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/main/java/org/wltea/analyzer/cfg/DefaultConfig.java‎
Lines changed: 4 additions & 4 deletions b/‎src/main/java/org/wltea/analyzer/cfg/DefaultConfig.java‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/main/java/org/wltea/analyzer/core/AnalyzeContext.java‎
Lines changed: 21 additions & 23 deletions b/‎src/main/java/org/wltea/analyzer/core/AnalyzeContext.java‎
Lines changed: 21 additions & 23 deletions
diff --git a/‎src/main/java/org/wltea/analyzer/core/CJKSegmenter.java‎
Lines changed: 17 additions & 17 deletions b/‎src/main/java/org/wltea/analyzer/core/CJKSegmenter.java‎
Lines changed: 17 additions & 17 deletions
@@ -22,23 +22,24 @@ ik-analyzer for solr 7.x-8.x
 | word | 64.2万 | 2014年 |
 | jieba | 58.4万 | 2012年 |
 | jcesg | 16.6万 | 2018年 |
-| sougou词库 | 115.2万 | 2019年 |
+| sougou词库 | 115.2万 | 2020年 |
 #### 将以上词库进行整理后约187.1万条词汇；
 #### 添加动态加载词典表功能，在不需要重启solr服务的情况下加载新增的词典。
+> <small>关闭默认主词典请在`IKAnalyzer.cfg.xml`配置文件中设置`use_main_dict`为`false`。</small>
 * IKAnalyzer的原作者为林良益<linliangyi2007@gmail.com>，项目网站为<http://code.google.com/p/ik-analyzer>
 * 该项目动态加载功能根据博主[@星火燎原智勇](http://www.cnblogs.com/liang1101/articles/6395016.html)的博客进行修改，其GITHUB地址为[@liang68](https://github.com/liang68)
 
 
 ## 使用说明
-* jar包下载地址：[![GitHub version](https://img.shields.io/badge/version-8.3.0-519dd9.svg)](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.0/ik-analyzer-8.3.0.jar)
+* jar包下载地址：[![GitHub version](https://img.shields.io/badge/version-8.3.1-519dd9.svg)](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.1/ik-analyzer-8.3.1.jar)
 * 历史版本：[![GitHub version](https://img.shields.io/maven-central/v/com.github.magese/ik-analyzer.svg?style=flat-square)](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
 
     ```console
     <!-- Maven仓库地址 -->
     <dependency>
         <groupId>com.github.magese</groupId>
         <artifactId>ik-analyzer</artifactId>
-        <version>8.3.0</version>
+        <version>8.3.1</version>
     </dependency>
     ```
 
@@ -79,7 +80,7 @@ ik-analyzer for solr 7.x-8.x
 5. `IKAnalyzer.cfg.xml`配置文件说明：
 
     | 名称 | 类型 | 描述 | 默认 |
-    | :------: | :------: | :------: | :------: |
+    | ------ | ------ | ------ | ------ |
     | use_main_dict | boolean | 是否使用默认主词典 | true |
     | ext_dict | String | 扩展词典文件名称，多个用分号隔开 | ext.dic; |
     | ext_stopwords | String | 停用词典文件名称，多个用分号隔开 | stopword.dic; |
@@ -100,6 +101,9 @@ ik-analyzer for solr 7.x-8.x
 
 
 ## 更新说明
+- `2020-12-30:`
+    - 升级lucene版本为`8.3.1`
+    - 更新词库
 - `2019-11-12:` 
     - 升级lucene版本为`8.3.0`
     - `IKAnalyzer.cfg.xml`增加配置项`use_main_dict`，用于配置是否启用默认主词典
 
@@ -4,7 +4,7 @@
 
     <groupId>com.github.magese</groupId>
     <artifactId>ik-analyzer</artifactId>
-    <version>8.3.0</version>
+    <version>8.3.1</version>
     <packaging>jar</packaging>
 
     <name>ik-analyzer-solr</name>
@@ -13,7 +13,7 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <lucene.version>8.3.0</lucene.version>
+        <lucene.version>8.3.1</lucene.version>
         <javac.src.version>1.8</javac.src.version>
         <javac.target.version>1.8</javac.target.version>
         <maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
@@ -152,4 +152,3 @@
         </profile>
     </profiles>
 </project>
-  
@@ -1,6 +1,6 @@
 /*
- * IK 中文分词  版本 8.3.0
- * IK Analyzer release 8.3.0
+ * IK 中文分词  版本 8.3.1
+ * IK Analyzer release 8.3.1
  *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -21,8 +21,8 @@
  * 版权声明 2012，乌龙茶工作室
  * provided by Linliangyi and copyright 2012 by Oolong studio
  *
- * 8.3.0版本 由 Magese (magese@live.cn) 更新
- * release 8.3.0 update by Magese(magese@live.cn)
+ * 8.3.1版本 由 Magese (magese@live.cn) 更新
+ * release 8.3.1 update by Magese(magese@live.cn)
  *
  */
 package org.wltea.analyzer.cfg;
 
@@ -1,6 +1,6 @@
 /*
- * IK 中文分词  版本 8.3.0
- * IK Analyzer release 8.3.0
+ * IK 中文分词  版本 8.3.1
+ * IK Analyzer release 8.3.1
  *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -21,8 +21,8 @@
  * 版权声明 2012，乌龙茶工作室
  * provided by Linliangyi and copyright 2012 by Oolong studio
  *
- * 8.3.0版本 由 Magese (magese@live.cn) 更新
- * release 8.3.0 update by Magese(magese@live.cn)
+ * 8.3.1版本 由 Magese (magese@live.cn) 更新
+ * release 8.3.1 update by Magese(magese@live.cn)
  *
  */
 package org.wltea.analyzer.cfg;
 
@@ -1,6 +1,6 @@
 /*
- * IK 中文分词  版本 8.3.0
- * IK Analyzer release 8.3.0
+ * IK 中文分词  版本 8.3.1
+ * IK Analyzer release 8.3.1
  *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -21,23 +21,19 @@
  * 版权声明 2012，乌龙茶工作室
  * provided by Linliangyi and copyright 2012 by Oolong studio
  *
- * 8.3.0版本 由 Magese (magese@live.cn) 更新
- * release 8.3.0 update by Magese(magese@live.cn)
+ * 8.3.1版本 由 Magese (magese@live.cn) 更新
+ * release 8.3.1 update by Magese(magese@live.cn)
  *
  */
 package org.wltea.analyzer.core;
 
-import java.io.IOException;
-import java.io.Reader;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Set;
-
 import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.dic.Dictionary;
 
+import java.io.IOException;
+import java.io.Reader;
+import java.util.*;
+
 /**
  * 分词器上下文状态
  */
@@ -66,17 +62,17 @@ class AnalyzeContext {
 
     //子分词器锁
     //该集合非空，说明有子分词器在占用segmentBuff
-    private Set<String> buffLocker;
+    private final Set<String> buffLocker;
 
     //原始分词结果集合，未经歧义处理
     private QuickSortSet orgLexemes;
     //LexemePath位置索引表
-    private Map<Integer, LexemePath> pathMap;
+    private final Map<Integer, LexemePath> pathMap;
     //最终分词结果集
-    private LinkedList<Lexeme> results;
+    private final LinkedList<Lexeme> results;
 
     //分词器配置项
-    private Configuration cfg;
+    private final Configuration cfg;
 
     AnalyzeContext(Configuration cfg) {
         this.cfg = cfg;
@@ -254,7 +250,7 @@ QuickSortSet getOrgLexemes() {
      */
     void outputToResult() {
         int index = 0;
-        for (; index <= this.cursor; ) {
+        while (index <= this.cursor) {
             //跳过非CJK字符
             if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
                 index++;
@@ -353,12 +349,14 @@ private void compound(Lexeme result) {
             if (Lexeme.TYPE_ARABIC == result.getLexemeType()) {
                 Lexeme nextLexeme = this.results.peekFirst();
                 boolean appendOk = false;
-                if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
-                    //合并英文数词+中文数词
-                    appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
-                } else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
-                    //合并英文数词+中文量词
-                    appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
+                if (nextLexeme != null) {
+                    if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
+                        //合并英文数词+中文数词
+                        appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
+                    } else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
+                        //合并英文数词+中文量词
+                        appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
+                    }
                 }
                 if (appendOk) {
                     //弹出
 
@@ -1,6 +1,6 @@
 /*
- * IK 中文分词  版本 8.3.0
- * IK Analyzer release 8.3.0
+ * IK 中文分词  版本 8.3.1
+ * IK Analyzer release 8.3.1
  *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -21,8 +21,8 @@
  * 版权声明 2012，乌龙茶工作室
  * provided by Linliangyi and copyright 2012 by Oolong studio
  *
- * 8.3.0版本 由 Magese (magese@live.cn) 更新
- * release 8.3.0 update by Magese(magese@live.cn)
+ * 8.3.1版本 由 Magese (magese@live.cn) 更新
+ * release 8.3.1 update by Magese(magese@live.cn)
  *
  */
 package org.wltea.analyzer.core;
@@ -38,13 +38,13 @@
  *  中文-日韩文子分词器
  */
 class CJKSegmenter implements ISegmenter {
-	
+
 	//子分词器标签
 	private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
 	//待处理的分词hit队列
 	private List<Hit> tmpHits;
-	
-	
+
+
 	CJKSegmenter(){
 		this.tmpHits = new LinkedList<>();
 	}
@@ -54,7 +54,7 @@ class CJKSegmenter implements ISegmenter {
 	 */
 	public void analyze(AnalyzeContext context) {
 		if(CharacterUtil.CHAR_USELESS != context.getCurrentCharType()){
-			
+
 			//优先处理tmpHits中的hit
 			if(!this.tmpHits.isEmpty()){
 				//处理词段队列
@@ -65,18 +65,18 @@ public void analyze(AnalyzeContext context) {
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
 						context.addLexeme(newLexeme);
-						
+
 						if(!hit.isPrefix()){//不是词前缀，hit不需要继续匹配，移除
 							this.tmpHits.remove(hit);
 						}
-						
+
 					}else if(hit.isUnmatch()){
 						//hit不是词，移除
 						this.tmpHits.remove(hit);
-					}					
+					}
 				}
-			}			
-			
+			}
+
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
 			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
@@ -94,24 +94,24 @@ public void analyze(AnalyzeContext context) {
 				//前缀匹配则放入hit列表
 				this.tmpHits.add(singleCharHit);
 			}
-			
+
 
 		}else{
 			//遇到CHAR_USELESS字符
 			//清空队列
 			this.tmpHits.clear();
 		}
-		
+
 		//判断缓冲区是否已经读完
 		if(context.isBufferConsumed()){
 			//清空队列
 			this.tmpHits.clear();
 		}
-		
+
 		//判断是否锁定缓冲区
 		if(this.tmpHits.size() == 0){
 			context.unlockBuffer(SEGMENTER_NAME);
-			
+
 		}else{
 			context.lockBuffer(SEGMENTER_NAME);
 		}
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`/*`
`2`		`- * IK 中文分词版本 8.3.0`
`3`		`- * IK Analyzer release 8.3.0`
	`2`	`+ * IK 中文分词版本 8.3.1`
	`3`	`+ * IK Analyzer release 8.3.1`
`4`	`4`	`*`
`5`	`5`	`* Licensed to the Apache Software Foundation (ASF) under one or more`
`6`	`6`	`* contributor license agreements. See the NOTICE file distributed with`
`@@ -21,8 +21,8 @@`
`21`	`21`	`* 版权声明 2012，乌龙茶工作室`
`22`	`22`	`* provided by Linliangyi and copyright 2012 by Oolong studio`
`23`	`23`	`*`
`24`		`- * 8.3.0版本由 Magese (magese@live.cn) 更新`
`25`		`- * release 8.3.0 update by Magese(magese@live.cn)`
	`24`	`+ * 8.3.1版本由 Magese (magese@live.cn) 更新`
	`25`	`+ * release 8.3.1 update by Magese(magese@live.cn)`
`26`	`26`	`*`
`27`	`27`	`*/`
`28`	`28`	`package org.wltea.analyzer.cfg;`