Skip to content

Commit 21a859a

Browse files
committed
Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik
2 parents 816b8dd + 7028b9e commit 21a859a

File tree

1 file changed

+30
-252
lines changed

1 file changed

+30
-252
lines changed

src/main/java/org/wltea/analyzer/dic/Dictionary.java

Lines changed: 30 additions & 252 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
package org.wltea.analyzer.dic;
2727

2828
import java.io.BufferedReader;
29-
import java.io.File;
3029
import java.io.FileInputStream;
3130
import java.io.FileNotFoundException;
3231
import java.io.IOException;
@@ -201,6 +200,28 @@ public FileVisitResult visitFileFailed(Path file, IOException e) {
201200
return files;
202201
}
203202

203+
private void loadDictFile(DictSegment dict, Path file, boolean critical, String name) {
204+
try (InputStream is = new FileInputStream(file.toFile())) {
205+
BufferedReader br = new BufferedReader(
206+
new InputStreamReader(is, "UTF-8"), 512);
207+
String word = br.readLine();
208+
if (word != null) {
209+
if (word.startsWith("\uFEFF"))
210+
word = word.substring(1);
211+
for (; word != null; word = br.readLine()) {
212+
word = word.trim();
213+
if (word.isEmpty()) continue;
214+
dict.fillSegment(word.toCharArray());
215+
}
216+
}
217+
} catch (FileNotFoundException e) {
218+
logger.error("ik-analyzer: " + name + " not found", e);
219+
if (critical) throw new RuntimeException("ik-analyzer: " + name + " not found!!!", e);
220+
} catch (IOException e) {
221+
logger.error("ik-analyzer: " + name + " loading failed", e);
222+
}
223+
}
224+
204225
public List<String> getExtDictionarys() {
205226
List<String> extDictFiles = new ArrayList<String>(2);
206227
String extDictCfg = getProperty(EXT_DICT);
@@ -371,37 +392,7 @@ private void loadMainDict() {
371392

372393
// 读取主词典文件
373394
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
374-
375-
InputStream is = null;
376-
try {
377-
is = new FileInputStream(file.toFile());
378-
} catch (FileNotFoundException e) {
379-
logger.error(e.getMessage(), e);
380-
}
381-
382-
try {
383-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
384-
String theWord = null;
385-
do {
386-
theWord = br.readLine();
387-
if (theWord != null && !"".equals(theWord.trim())) {
388-
_MainDict.fillSegment(theWord.trim().toCharArray());
389-
}
390-
} while (theWord != null);
391-
392-
} catch (IOException e) {
393-
logger.error("ik-analyzer", e);
394-
395-
} finally {
396-
try {
397-
if (is != null) {
398-
is.close();
399-
is = null;
400-
}
401-
} catch (IOException e) {
402-
logger.error("ik-analyzer", e);
403-
}
404-
}
395+
loadDictFile(_MainDict, file, false, "Main Dict");
405396
// 加载扩展词典
406397
this.loadExtDict();
407398
// 加载远程自定义词库
@@ -415,44 +406,11 @@ private void loadExtDict() {
415406
// 加载扩展词典配置
416407
List<String> extDictFiles = getExtDictionarys();
417408
if (extDictFiles != null) {
418-
InputStream is = null;
419409
for (String extDictName : extDictFiles) {
420410
// 读取扩展词典文件
421411
logger.info("[Dict Loading] " + extDictName);
422412
Path file = PathUtils.get(extDictName);
423-
try {
424-
is = new FileInputStream(file.toFile());
425-
} catch (FileNotFoundException e) {
426-
logger.error("ik-analyzer", e);
427-
}
428-
429-
// 如果找不到扩展的字典,则忽略
430-
if (is == null) {
431-
continue;
432-
}
433-
try {
434-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
435-
String theWord = null;
436-
do {
437-
theWord = br.readLine();
438-
if (theWord != null && !"".equals(theWord.trim())) {
439-
// 加载扩展词典数据到主内存词典中
440-
_MainDict.fillSegment(theWord.trim().toCharArray());
441-
}
442-
} while (theWord != null);
443-
444-
} catch (IOException e) {
445-
logger.error("ik-analyzer", e);
446-
} finally {
447-
try {
448-
if (is != null) {
449-
is.close();
450-
is = null;
451-
}
452-
} catch (IOException e) {
453-
logger.error("ik-analyzer", e);
454-
}
455-
}
413+
loadDictFile(_MainDict, file, false, "Extra Dict");
456414
}
457415
}
458416
}
@@ -533,80 +491,17 @@ private void loadStopWordDict() {
533491

534492
// 读取主词典文件
535493
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
536-
537-
InputStream is = null;
538-
try {
539-
is = new FileInputStream(file.toFile());
540-
} catch (FileNotFoundException e) {
541-
logger.error(e.getMessage(), e);
542-
}
543-
544-
try {
545-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
546-
String theWord = null;
547-
do {
548-
theWord = br.readLine();
549-
if (theWord != null && !"".equals(theWord.trim())) {
550-
_StopWords.fillSegment(theWord.trim().toCharArray());
551-
}
552-
} while (theWord != null);
553-
554-
} catch (IOException e) {
555-
logger.error("ik-analyzer", e);
556-
557-
} finally {
558-
try {
559-
if (is != null) {
560-
is.close();
561-
is = null;
562-
}
563-
} catch (IOException e) {
564-
logger.error("ik-analyzer", e);
565-
}
566-
}
494+
loadDictFile(_StopWords, file, false, "Main Stopwords");
567495

568496
// 加载扩展停止词典
569497
List<String> extStopWordDictFiles = getExtStopWordDictionarys();
570498
if (extStopWordDictFiles != null) {
571-
is = null;
572499
for (String extStopWordDictName : extStopWordDictFiles) {
573500
logger.info("[Dict Loading] " + extStopWordDictName);
574501

575502
// 读取扩展词典文件
576503
file = PathUtils.get(extStopWordDictName);
577-
try {
578-
is = new FileInputStream(file.toFile());
579-
} catch (FileNotFoundException e) {
580-
logger.error("ik-analyzer", e);
581-
}
582-
// 如果找不到扩展的字典,则忽略
583-
if (is == null) {
584-
continue;
585-
}
586-
try {
587-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
588-
String theWord = null;
589-
do {
590-
theWord = br.readLine();
591-
if (theWord != null && !"".equals(theWord.trim())) {
592-
// 加载扩展停止词典数据到内存中
593-
_StopWords.fillSegment(theWord.trim().toCharArray());
594-
}
595-
} while (theWord != null);
596-
597-
} catch (IOException e) {
598-
logger.error("ik-analyzer", e);
599-
600-
} finally {
601-
try {
602-
if (is != null) {
603-
is.close();
604-
is = null;
605-
}
606-
} catch (IOException e) {
607-
logger.error("ik-analyzer", e);
608-
}
609-
}
504+
loadDictFile(_StopWords, file, false, "Extra Stopwords");
610505
}
611506
}
612507

@@ -639,142 +534,25 @@ private void loadQuantifierDict() {
639534
_QuantifierDict = new DictSegment((char) 0);
640535
// 读取量词词典文件
641536
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
642-
InputStream is = null;
643-
try {
644-
is = new FileInputStream(file.toFile());
645-
} catch (FileNotFoundException e) {
646-
logger.error("ik-analyzer", e);
647-
}
648-
try {
649-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
650-
String theWord = null;
651-
do {
652-
theWord = br.readLine();
653-
if (theWord != null && !"".equals(theWord.trim())) {
654-
_QuantifierDict.fillSegment(theWord.trim().toCharArray());
655-
}
656-
} while (theWord != null);
657-
658-
} catch (IOException ioe) {
659-
logger.error("Quantifier Dictionary loading exception.");
660-
661-
} finally {
662-
try {
663-
if (is != null) {
664-
is.close();
665-
is = null;
666-
}
667-
} catch (IOException e) {
668-
logger.error("ik-analyzer", e);
669-
}
670-
}
537+
loadDictFile(_QuantifierDict, file, false, "Quantifier");
671538
}
672539

673540
private void loadSurnameDict() {
674-
675541
_SurnameDict = new DictSegment((char) 0);
676542
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME);
677-
InputStream is = null;
678-
try {
679-
is = new FileInputStream(file.toFile());
680-
} catch (FileNotFoundException e) {
681-
logger.error("ik-analyzer", e);
682-
}
683-
if (is == null) {
684-
throw new RuntimeException("Surname Dictionary not found!!!");
685-
}
686-
try {
687-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
688-
String theWord;
689-
do {
690-
theWord = br.readLine();
691-
if (theWord != null && !"".equals(theWord.trim())) {
692-
_SurnameDict.fillSegment(theWord.trim().toCharArray());
693-
}
694-
} while (theWord != null);
695-
} catch (IOException e) {
696-
logger.error("ik-analyzer", e);
697-
} finally {
698-
try {
699-
if (is != null) {
700-
is.close();
701-
is = null;
702-
}
703-
} catch (IOException e) {
704-
logger.error("ik-analyzer", e);
705-
}
706-
}
543+
loadDictFile(_SurnameDict, file, true, "Surname");
707544
}
708545

709546
private void loadSuffixDict() {
710-
711547
_SuffixDict = new DictSegment((char) 0);
712548
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
713-
InputStream is = null;
714-
try {
715-
is = new FileInputStream(file.toFile());
716-
} catch (FileNotFoundException e) {
717-
logger.error("ik-analyzer", e);
718-
}
719-
if (is == null) {
720-
throw new RuntimeException("Suffix Dictionary not found!!!");
721-
}
722-
try {
723-
724-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
725-
String theWord;
726-
do {
727-
theWord = br.readLine();
728-
if (theWord != null && !"".equals(theWord.trim())) {
729-
_SuffixDict.fillSegment(theWord.trim().toCharArray());
730-
}
731-
} while (theWord != null);
732-
} catch (IOException e) {
733-
logger.error("ik-analyzer", e);
734-
} finally {
735-
try {
736-
is.close();
737-
is = null;
738-
} catch (IOException e) {
739-
logger.error("ik-analyzer", e);
740-
}
741-
}
549+
loadDictFile(_SuffixDict, file, true, "Suffix");
742550
}
743551

744552
private void loadPrepDict() {
745-
746553
_PrepDict = new DictSegment((char) 0);
747554
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP);
748-
InputStream is = null;
749-
try {
750-
is = new FileInputStream(file.toFile());
751-
} catch (FileNotFoundException e) {
752-
logger.error("ik-analyzer", e);
753-
}
754-
if (is == null) {
755-
throw new RuntimeException("Preposition Dictionary not found!!!");
756-
}
757-
try {
758-
759-
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
760-
String theWord;
761-
do {
762-
theWord = br.readLine();
763-
if (theWord != null && !"".equals(theWord.trim())) {
764-
765-
_PrepDict.fillSegment(theWord.trim().toCharArray());
766-
}
767-
} while (theWord != null);
768-
} catch (IOException e) {
769-
logger.error("ik-analyzer", e);
770-
} finally {
771-
try {
772-
is.close();
773-
is = null;
774-
} catch (IOException e) {
775-
logger.error("ik-analyzer", e);
776-
}
777-
}
555+
loadDictFile(_PrepDict, file, true, "Preposition");
778556
}
779557

780558
public void reLoadMainDict() {

0 commit comments

Comments
 (0)