26
26
package org .wltea .analyzer .dic ;
27
27
28
28
import java .io .BufferedReader ;
29
- import java .io .File ;
30
29
import java .io .FileInputStream ;
31
30
import java .io .FileNotFoundException ;
32
31
import java .io .IOException ;
@@ -201,6 +200,28 @@ public FileVisitResult visitFileFailed(Path file, IOException e) {
201
200
return files ;
202
201
}
203
202
203
+ private void loadDictFile (DictSegment dict , Path file , boolean critical , String name ) {
204
+ try (InputStream is = new FileInputStream (file .toFile ())) {
205
+ BufferedReader br = new BufferedReader (
206
+ new InputStreamReader (is , "UTF-8" ), 512 );
207
+ String word = br .readLine ();
208
+ if (word != null ) {
209
+ if (word .startsWith ("\uFEFF " ))
210
+ word = word .substring (1 );
211
+ for (; word != null ; word = br .readLine ()) {
212
+ word = word .trim ();
213
+ if (word .isEmpty ()) continue ;
214
+ dict .fillSegment (word .toCharArray ());
215
+ }
216
+ }
217
+ } catch (FileNotFoundException e ) {
218
+ logger .error ("ik-analyzer: " + name + " not found" , e );
219
+ if (critical ) throw new RuntimeException ("ik-analyzer: " + name + " not found!!!" , e );
220
+ } catch (IOException e ) {
221
+ logger .error ("ik-analyzer: " + name + " loading failed" , e );
222
+ }
223
+ }
224
+
204
225
public List <String > getExtDictionarys () {
205
226
List <String > extDictFiles = new ArrayList <String >(2 );
206
227
String extDictCfg = getProperty (EXT_DICT );
@@ -371,37 +392,7 @@ private void loadMainDict() {
371
392
372
393
// 读取主词典文件
373
394
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_MAIN );
374
-
375
- InputStream is = null ;
376
- try {
377
- is = new FileInputStream (file .toFile ());
378
- } catch (FileNotFoundException e ) {
379
- logger .error (e .getMessage (), e );
380
- }
381
-
382
- try {
383
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
384
- String theWord = null ;
385
- do {
386
- theWord = br .readLine ();
387
- if (theWord != null && !"" .equals (theWord .trim ())) {
388
- _MainDict .fillSegment (theWord .trim ().toCharArray ());
389
- }
390
- } while (theWord != null );
391
-
392
- } catch (IOException e ) {
393
- logger .error ("ik-analyzer" , e );
394
-
395
- } finally {
396
- try {
397
- if (is != null ) {
398
- is .close ();
399
- is = null ;
400
- }
401
- } catch (IOException e ) {
402
- logger .error ("ik-analyzer" , e );
403
- }
404
- }
395
+ loadDictFile (_MainDict , file , false , "Main Dict" );
405
396
// 加载扩展词典
406
397
this .loadExtDict ();
407
398
// 加载远程自定义词库
@@ -415,44 +406,11 @@ private void loadExtDict() {
415
406
// 加载扩展词典配置
416
407
List <String > extDictFiles = getExtDictionarys ();
417
408
if (extDictFiles != null ) {
418
- InputStream is = null ;
419
409
for (String extDictName : extDictFiles ) {
420
410
// 读取扩展词典文件
421
411
logger .info ("[Dict Loading] " + extDictName );
422
412
Path file = PathUtils .get (extDictName );
423
- try {
424
- is = new FileInputStream (file .toFile ());
425
- } catch (FileNotFoundException e ) {
426
- logger .error ("ik-analyzer" , e );
427
- }
428
-
429
- // 如果找不到扩展的字典,则忽略
430
- if (is == null ) {
431
- continue ;
432
- }
433
- try {
434
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
435
- String theWord = null ;
436
- do {
437
- theWord = br .readLine ();
438
- if (theWord != null && !"" .equals (theWord .trim ())) {
439
- // 加载扩展词典数据到主内存词典中
440
- _MainDict .fillSegment (theWord .trim ().toCharArray ());
441
- }
442
- } while (theWord != null );
443
-
444
- } catch (IOException e ) {
445
- logger .error ("ik-analyzer" , e );
446
- } finally {
447
- try {
448
- if (is != null ) {
449
- is .close ();
450
- is = null ;
451
- }
452
- } catch (IOException e ) {
453
- logger .error ("ik-analyzer" , e );
454
- }
455
- }
413
+ loadDictFile (_MainDict , file , false , "Extra Dict" );
456
414
}
457
415
}
458
416
}
@@ -533,80 +491,17 @@ private void loadStopWordDict() {
533
491
534
492
// 读取主词典文件
535
493
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_STOP );
536
-
537
- InputStream is = null ;
538
- try {
539
- is = new FileInputStream (file .toFile ());
540
- } catch (FileNotFoundException e ) {
541
- logger .error (e .getMessage (), e );
542
- }
543
-
544
- try {
545
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
546
- String theWord = null ;
547
- do {
548
- theWord = br .readLine ();
549
- if (theWord != null && !"" .equals (theWord .trim ())) {
550
- _StopWords .fillSegment (theWord .trim ().toCharArray ());
551
- }
552
- } while (theWord != null );
553
-
554
- } catch (IOException e ) {
555
- logger .error ("ik-analyzer" , e );
556
-
557
- } finally {
558
- try {
559
- if (is != null ) {
560
- is .close ();
561
- is = null ;
562
- }
563
- } catch (IOException e ) {
564
- logger .error ("ik-analyzer" , e );
565
- }
566
- }
494
+ loadDictFile (_StopWords , file , false , "Main Stopwords" );
567
495
568
496
// 加载扩展停止词典
569
497
List <String > extStopWordDictFiles = getExtStopWordDictionarys ();
570
498
if (extStopWordDictFiles != null ) {
571
- is = null ;
572
499
for (String extStopWordDictName : extStopWordDictFiles ) {
573
500
logger .info ("[Dict Loading] " + extStopWordDictName );
574
501
575
502
// 读取扩展词典文件
576
503
file = PathUtils .get (extStopWordDictName );
577
- try {
578
- is = new FileInputStream (file .toFile ());
579
- } catch (FileNotFoundException e ) {
580
- logger .error ("ik-analyzer" , e );
581
- }
582
- // 如果找不到扩展的字典,则忽略
583
- if (is == null ) {
584
- continue ;
585
- }
586
- try {
587
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
588
- String theWord = null ;
589
- do {
590
- theWord = br .readLine ();
591
- if (theWord != null && !"" .equals (theWord .trim ())) {
592
- // 加载扩展停止词典数据到内存中
593
- _StopWords .fillSegment (theWord .trim ().toCharArray ());
594
- }
595
- } while (theWord != null );
596
-
597
- } catch (IOException e ) {
598
- logger .error ("ik-analyzer" , e );
599
-
600
- } finally {
601
- try {
602
- if (is != null ) {
603
- is .close ();
604
- is = null ;
605
- }
606
- } catch (IOException e ) {
607
- logger .error ("ik-analyzer" , e );
608
- }
609
- }
504
+ loadDictFile (_StopWords , file , false , "Extra Stopwords" );
610
505
}
611
506
}
612
507
@@ -639,142 +534,25 @@ private void loadQuantifierDict() {
639
534
_QuantifierDict = new DictSegment ((char ) 0 );
640
535
// 读取量词词典文件
641
536
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_QUANTIFIER );
642
- InputStream is = null ;
643
- try {
644
- is = new FileInputStream (file .toFile ());
645
- } catch (FileNotFoundException e ) {
646
- logger .error ("ik-analyzer" , e );
647
- }
648
- try {
649
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
650
- String theWord = null ;
651
- do {
652
- theWord = br .readLine ();
653
- if (theWord != null && !"" .equals (theWord .trim ())) {
654
- _QuantifierDict .fillSegment (theWord .trim ().toCharArray ());
655
- }
656
- } while (theWord != null );
657
-
658
- } catch (IOException ioe ) {
659
- logger .error ("Quantifier Dictionary loading exception." );
660
-
661
- } finally {
662
- try {
663
- if (is != null ) {
664
- is .close ();
665
- is = null ;
666
- }
667
- } catch (IOException e ) {
668
- logger .error ("ik-analyzer" , e );
669
- }
670
- }
537
+ loadDictFile (_QuantifierDict , file , false , "Quantifier" );
671
538
}
672
539
673
540
private void loadSurnameDict () {
674
-
675
541
_SurnameDict = new DictSegment ((char ) 0 );
676
542
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_SURNAME );
677
- InputStream is = null ;
678
- try {
679
- is = new FileInputStream (file .toFile ());
680
- } catch (FileNotFoundException e ) {
681
- logger .error ("ik-analyzer" , e );
682
- }
683
- if (is == null ) {
684
- throw new RuntimeException ("Surname Dictionary not found!!!" );
685
- }
686
- try {
687
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
688
- String theWord ;
689
- do {
690
- theWord = br .readLine ();
691
- if (theWord != null && !"" .equals (theWord .trim ())) {
692
- _SurnameDict .fillSegment (theWord .trim ().toCharArray ());
693
- }
694
- } while (theWord != null );
695
- } catch (IOException e ) {
696
- logger .error ("ik-analyzer" , e );
697
- } finally {
698
- try {
699
- if (is != null ) {
700
- is .close ();
701
- is = null ;
702
- }
703
- } catch (IOException e ) {
704
- logger .error ("ik-analyzer" , e );
705
- }
706
- }
543
+ loadDictFile (_SurnameDict , file , true , "Surname" );
707
544
}
708
545
709
546
private void loadSuffixDict () {
710
-
711
547
_SuffixDict = new DictSegment ((char ) 0 );
712
548
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_SUFFIX );
713
- InputStream is = null ;
714
- try {
715
- is = new FileInputStream (file .toFile ());
716
- } catch (FileNotFoundException e ) {
717
- logger .error ("ik-analyzer" , e );
718
- }
719
- if (is == null ) {
720
- throw new RuntimeException ("Suffix Dictionary not found!!!" );
721
- }
722
- try {
723
-
724
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
725
- String theWord ;
726
- do {
727
- theWord = br .readLine ();
728
- if (theWord != null && !"" .equals (theWord .trim ())) {
729
- _SuffixDict .fillSegment (theWord .trim ().toCharArray ());
730
- }
731
- } while (theWord != null );
732
- } catch (IOException e ) {
733
- logger .error ("ik-analyzer" , e );
734
- } finally {
735
- try {
736
- is .close ();
737
- is = null ;
738
- } catch (IOException e ) {
739
- logger .error ("ik-analyzer" , e );
740
- }
741
- }
549
+ loadDictFile (_SuffixDict , file , true , "Suffix" );
742
550
}
743
551
744
552
private void loadPrepDict () {
745
-
746
553
_PrepDict = new DictSegment ((char ) 0 );
747
554
Path file = PathUtils .get (getDictRoot (), Dictionary .PATH_DIC_PREP );
748
- InputStream is = null ;
749
- try {
750
- is = new FileInputStream (file .toFile ());
751
- } catch (FileNotFoundException e ) {
752
- logger .error ("ik-analyzer" , e );
753
- }
754
- if (is == null ) {
755
- throw new RuntimeException ("Preposition Dictionary not found!!!" );
756
- }
757
- try {
758
-
759
- BufferedReader br = new BufferedReader (new InputStreamReader (is , "UTF-8" ), 512 );
760
- String theWord ;
761
- do {
762
- theWord = br .readLine ();
763
- if (theWord != null && !"" .equals (theWord .trim ())) {
764
-
765
- _PrepDict .fillSegment (theWord .trim ().toCharArray ());
766
- }
767
- } while (theWord != null );
768
- } catch (IOException e ) {
769
- logger .error ("ik-analyzer" , e );
770
- } finally {
771
- try {
772
- is .close ();
773
- is = null ;
774
- } catch (IOException e ) {
775
- logger .error ("ik-analyzer" , e );
776
- }
777
- }
555
+ loadDictFile (_PrepDict , file , true , "Preposition" );
778
556
}
779
557
780
558
public void reLoadMainDict () {
0 commit comments