1111import java .util .stream .Stream ;
1212
1313public class PinyinDict {
14- private static PatriciaTrie <Float > trie = new PatriciaTrie <>();
14+ private static PatriciaTrie <Double > trie = new PatriciaTrie <>();
1515 private static Map <String , List <WordInfo >> pinyinDict = new HashMap <>();
1616
1717 public static void buildPinyinDict (String content ) {
@@ -22,21 +22,23 @@ public static void buildPinyinDict(String content) {
2222
2323 lines .forEach (line -> {
2424 String [] arr = line .split (" 0 " );
25+ // 董 2494.97706011 0 dong
26+ // 西红柿 760.851466162 0 xi hong shi
2527
2628 if (arr .length == 2 ) {
2729 String abbr = Arrays .stream (arr [1 ].split (" " ))
2830 .map (item -> item .substring (0 , 1 ))
2931 .collect (Collectors .joining ());
3032
31- String pinyin = arr [1 ].replaceAll ( " \\ s+ " , "" );
33+ String pinyin = arr [1 ].replace ( " " , "" );
3234 String [] wordFrequency = arr [0 ].split (" " );
3335 String word = wordFrequency [0 ];
34- float frequency = Float . parseFloat (wordFrequency [1 ]);
35- WordInfo value = new WordInfo (word , frequency );
36- trie .put (word , frequency );
37- pinyinDict .computeIfAbsent (pinyin , k -> new ArrayList <>()).add (value );
36+ double frequency = Double . parseDouble (wordFrequency [1 ]);
37+ WordInfo wordInfo = new WordInfo (word , frequency );
38+ trie .put (pinyin , frequency );
39+ pinyinDict .computeIfAbsent (pinyin , k -> new ArrayList <>()).add (wordInfo );
3840 if (abbr .length () >= 1 ) {
39- pinyinDict .computeIfAbsent (abbr , k -> new ArrayList <>()).add (value );
41+ pinyinDict .computeIfAbsent (abbr , k -> new ArrayList <>()).add (wordInfo );
4042 }
4143 }
4244 });
@@ -52,28 +54,34 @@ public static List<String> getCandidates(String input) {
5254 // Full pinyin match or abbr match
5355 list = value ;
5456 } else if (input .length () >= 1 ) {
55- Map <String , Float > prefixMap = trie .prefixMap (input );
56- List <Map .Entry <String , Float >> matchingWords = new ArrayList <>(prefixMap .entrySet ());
57- if (!matchingWords .isEmpty ()) {
58- for (Map .Entry <String , Float > entry : matchingWords ) {
59- List <WordInfo > words = pinyinDict .get (entry .getKey ());
60- if (words != null ) {
61- list = words ;
62- }
63- }
64- }
57+ // pinyin prefix match
58+ list = getCandidatesFromTrie (input );
6559 }
6660
6761 // Sort candidates by word frequency
6862 candidates = list .stream ()
6963 .filter (java .util .Objects ::nonNull )
70- .sorted ((a , b ) -> Float .compare (b .getFrequency (), a .getFrequency ()))
64+ .sorted ((a , b ) -> Double .compare (b .getFrequency (), a .getFrequency ()))
7165 .map (WordInfo ::getWord )
7266 .distinct ()
7367 .collect (Collectors .toList ());
7468 }
7569
76- // Removing duplicates
70+ return candidates ;
71+ }
72+
73+ private static List <WordInfo > getCandidatesFromTrie (String prefix ) {
74+ List <WordInfo > candidates = new ArrayList <>();
75+ Map <String , Double > prefixMap = trie .prefixMap (prefix );
76+ if (!prefixMap .isEmpty ()) {
77+ List <Map .Entry <String , Double >> matchingWords = new ArrayList <>(prefixMap .entrySet ());
78+ for (Map .Entry <String , Double > entry : matchingWords ) {
79+ List <WordInfo > words = pinyinDict .get (entry .getKey ());
80+ if (words != null ) {
81+ candidates .addAll (words );
82+ }
83+ }
84+ }
7785 return candidates ;
7886 }
7987
@@ -82,14 +90,14 @@ public String getWord() {
8290 return word ;
8391 }
8492
85- public float getFrequency () {
93+ public double getFrequency () {
8694 return frequency ;
8795 }
8896
8997 String word ;
90- float frequency ;
98+ double frequency ;
9199
92- WordInfo (String word , float frequency ) {
100+ WordInfo (String word , double frequency ) {
93101 this .word = word ;
94102 this .frequency = frequency ;
95103 }
0 commit comments