|
1 | 1 | /* |
2 | | - * IK 中文分词 版本 8.3.0 |
3 | | - * IK Analyzer release 8.3.0 |
| 2 | + * IK 中文分词 版本 8.3.1 |
| 3 | + * IK Analyzer release 8.3.1 |
4 | 4 | * |
5 | 5 | * Licensed to the Apache Software Foundation (ASF) under one or more |
6 | 6 | * contributor license agreements. See the NOTICE file distributed with |
|
21 | 21 | * 版权声明 2012,乌龙茶工作室 |
22 | 22 | * provided by Linliangyi and copyright 2012 by Oolong studio |
23 | 23 | * |
24 | | - * 8.3.0版本 由 Magese (magese@live.cn) 更新 |
25 | | - * release 8.3.0 update by Magese(magese@live.cn) |
| 24 | + * 8.3.1版本 由 Magese (magese@live.cn) 更新 |
| 25 | + * release 8.3.1 update by Magese(magese@live.cn) |
26 | 26 | * |
27 | 27 | */ |
28 | 28 | package org.wltea.analyzer.core; |
29 | 29 |
|
30 | | -import java.io.IOException; |
31 | | -import java.io.Reader; |
32 | | -import java.util.HashMap; |
33 | | -import java.util.HashSet; |
34 | | -import java.util.LinkedList; |
35 | | -import java.util.Map; |
36 | | -import java.util.Set; |
37 | | - |
38 | 30 | import org.wltea.analyzer.cfg.Configuration; |
39 | 31 | import org.wltea.analyzer.dic.Dictionary; |
40 | 32 |
|
| 33 | +import java.io.IOException; |
| 34 | +import java.io.Reader; |
| 35 | +import java.util.*; |
| 36 | + |
41 | 37 | /** |
42 | 38 | * 分词器上下文状态 |
43 | 39 | */ |
@@ -66,17 +62,17 @@ class AnalyzeContext { |
66 | 62 |
|
67 | 63 | //子分词器锁 |
68 | 64 | //该集合非空,说明有子分词器在占用segmentBuff |
69 | | - private Set<String> buffLocker; |
| 65 | + private final Set<String> buffLocker; |
70 | 66 |
|
71 | 67 | //原始分词结果集合,未经歧义处理 |
72 | 68 | private QuickSortSet orgLexemes; |
73 | 69 | //LexemePath位置索引表 |
74 | | - private Map<Integer, LexemePath> pathMap; |
| 70 | + private final Map<Integer, LexemePath> pathMap; |
75 | 71 | //最终分词结果集 |
76 | | - private LinkedList<Lexeme> results; |
| 72 | + private final LinkedList<Lexeme> results; |
77 | 73 |
|
78 | 74 | //分词器配置项 |
79 | | - private Configuration cfg; |
| 75 | + private final Configuration cfg; |
80 | 76 |
|
81 | 77 | AnalyzeContext(Configuration cfg) { |
82 | 78 | this.cfg = cfg; |
@@ -254,7 +250,7 @@ QuickSortSet getOrgLexemes() { |
254 | 250 | */ |
255 | 251 | void outputToResult() { |
256 | 252 | int index = 0; |
257 | | - for (; index <= this.cursor; ) { |
| 253 | + while (index <= this.cursor) { |
258 | 254 | //跳过非CJK字符 |
259 | 255 | if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) { |
260 | 256 | index++; |
@@ -353,12 +349,14 @@ private void compound(Lexeme result) { |
353 | 349 | if (Lexeme.TYPE_ARABIC == result.getLexemeType()) { |
354 | 350 | Lexeme nextLexeme = this.results.peekFirst(); |
355 | 351 | boolean appendOk = false; |
356 | | - if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) { |
357 | | - //合并英文数词+中文数词 |
358 | | - appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM); |
359 | | - } else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) { |
360 | | - //合并英文数词+中文量词 |
361 | | - appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN); |
| 352 | + if (nextLexeme != null) { |
| 353 | + if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) { |
| 354 | + //合并英文数词+中文数词 |
| 355 | + appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM); |
| 356 | + } else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) { |
| 357 | + //合并英文数词+中文量词 |
| 358 | + appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN); |
| 359 | + } |
362 | 360 | } |
363 | 361 | if (appendOk) { |
364 | 362 | //弹出 |
|
0 commit comments