Skip to content

Commit 7e896fb

Browse files
committed
fix bug: get pinyin candidates from trie prefix match
1 parent 99aebf6 commit 7e896fb

File tree

2 files changed

+31
-22
lines changed

2 files changed

+31
-22
lines changed

app/src/main/java/rkr/tinykeyboard/inputmethod/PinyinDict.java

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import java.util.stream.Stream;
1212

1313
public class PinyinDict {
14-
private static PatriciaTrie<Float> trie = new PatriciaTrie<>();
14+
private static PatriciaTrie<Double> trie = new PatriciaTrie<>();
1515
private static Map<String, List<WordInfo>> pinyinDict = new HashMap<>();
1616

1717
public static void buildPinyinDict(String content) {
@@ -22,21 +22,23 @@ public static void buildPinyinDict(String content) {
2222

2323
lines.forEach(line -> {
2424
String[] arr = line.split(" 0 ");
25+
// 董 2494.97706011 0 dong
26+
// 西红柿 760.851466162 0 xi hong shi
2527

2628
if (arr.length == 2) {
2729
String abbr = Arrays.stream(arr[1].split(" "))
2830
.map(item -> item.substring(0, 1))
2931
.collect(Collectors.joining());
3032

31-
String pinyin = arr[1].replaceAll("\\s+", "");
33+
String pinyin = arr[1].replace(" ", "");
3234
String[] wordFrequency = arr[0].split(" ");
3335
String word = wordFrequency[0];
34-
float frequency = Float.parseFloat(wordFrequency[1]);
35-
WordInfo value = new WordInfo(word, frequency);
36-
trie.put(word, frequency);
37-
pinyinDict.computeIfAbsent(pinyin, k -> new ArrayList<>()).add(value);
36+
double frequency = Double.parseDouble(wordFrequency[1]);
37+
WordInfo wordInfo = new WordInfo(word, frequency);
38+
trie.put(pinyin, frequency);
39+
pinyinDict.computeIfAbsent(pinyin, k -> new ArrayList<>()).add(wordInfo);
3840
if (abbr.length() >= 1) {
39-
pinyinDict.computeIfAbsent(abbr, k -> new ArrayList<>()).add(value);
41+
pinyinDict.computeIfAbsent(abbr, k -> new ArrayList<>()).add(wordInfo);
4042
}
4143
}
4244
});
@@ -52,28 +54,34 @@ public static List<String> getCandidates(String input) {
5254
// Full pinyin match or abbr match
5355
list = value;
5456
} else if (input.length() >= 1) {
55-
Map<String, Float> prefixMap = trie.prefixMap(input);
56-
List<Map.Entry<String, Float>> matchingWords = new ArrayList<>(prefixMap.entrySet());
57-
if (!matchingWords.isEmpty()) {
58-
for (Map.Entry<String, Float> entry : matchingWords) {
59-
List<WordInfo> words = pinyinDict.get(entry.getKey());
60-
if (words != null) {
61-
list = words;
62-
}
63-
}
64-
}
57+
// pinyin prefix match
58+
list = getCandidatesFromTrie(input);
6559
}
6660

6761
// Sort candidates by word frequency
6862
candidates = list.stream()
6963
.filter(java.util.Objects::nonNull)
70-
.sorted((a, b) -> Float.compare(b.getFrequency(), a.getFrequency()))
64+
.sorted((a, b) -> Double.compare(b.getFrequency(), a.getFrequency()))
7165
.map(WordInfo::getWord)
7266
.distinct()
7367
.collect(Collectors.toList());
7468
}
7569

76-
// Removing duplicates
70+
return candidates;
71+
}
72+
73+
private static List<WordInfo> getCandidatesFromTrie(String prefix) {
74+
List<WordInfo> candidates = new ArrayList<>();
75+
Map<String, Double> prefixMap = trie.prefixMap(prefix);
76+
if (!prefixMap.isEmpty()) {
77+
List<Map.Entry<String, Double>> matchingWords = new ArrayList<>(prefixMap.entrySet());
78+
for (Map.Entry<String, Double> entry : matchingWords) {
79+
List<WordInfo> words = pinyinDict.get(entry.getKey());
80+
if (words != null) {
81+
candidates.addAll(words);
82+
}
83+
}
84+
}
7785
return candidates;
7886
}
7987

@@ -82,14 +90,14 @@ public String getWord() {
8290
return word;
8391
}
8492

85-
public float getFrequency() {
93+
public double getFrequency() {
8694
return frequency;
8795
}
8896

8997
String word;
90-
float frequency;
98+
double frequency;
9199

92-
WordInfo(String word, float frequency) {
100+
WordInfo(String word, double frequency) {
93101
this.word = word;
94102
this.frequency = frequency;
95103
}

log.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
adb logcat | grep "hallelujah"

0 commit comments

Comments
 (0)