Skip to content

Commit 27270c8

Browse files
committed
input: Implement T9 Predictive Text Engine
This commit introduces the T9PredictiveEngine class, which provides functionality for word prediction based on numeric key sequences. The engine utilizes a compact dictionary stored in flash memory to minimize RAM usage. Key features include adding key presses, backspacing, resetting the sequence, and navigating through word candidates. Signed-off-by: Chiho Sin <[email protected]>
1 parent c4c2c1b commit 27270c8

File tree

7 files changed

+12825
-7
lines changed

7 files changed

+12825
-7
lines changed

bin/gen-t9-dict.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Generate T9 Dictionary from frequency-sorted word list
4+
5+
This script reads a word list (one word per line, sorted by frequency)
6+
and generates a T9 dictionary header file for the Meshtastic firmware.
7+
8+
Usage:
9+
python3 gen-t9-dict.py <input_file> <max_words> > ../src/input/T9Dictionary.h
10+
11+
Arguments:
12+
input_file: Word list file (one word per line, most frequent first)
13+
max_words: Maximum number of words to include (e.g., 1000)
14+
15+
Example:
16+
python3 gen-t9-dict.py google-10000-english-no-swears.txt 1000 > ../src/input/T9Dictionary.h
17+
"""
18+
19+
import sys
20+
import re
21+
from collections import defaultdict
22+
23+
24+
def char_to_t9_key(c):
25+
"""Convert a character to its T9 key (2-9)"""
26+
c = c.lower()
27+
if c in "abc":
28+
return "2"
29+
if c in "def":
30+
return "3"
31+
if c in "ghi":
32+
return "4"
33+
if c in "jkl":
34+
return "5"
35+
if c in "mno":
36+
return "6"
37+
if c in "pqrs":
38+
return "7"
39+
if c in "tuv":
40+
return "8"
41+
if c in "wxyz":
42+
return "9"
43+
return None
44+
45+
46+
def word_to_t9_sequence(word):
47+
"""Convert a word to its T9 key sequence"""
48+
seq = ""
49+
for c in word.lower():
50+
key = char_to_t9_key(c)
51+
if key is None:
52+
return None # Invalid character
53+
seq += key
54+
return seq
55+
56+
57+
def is_valid_word(word):
58+
# """Check if word is valid (alphabetic, at least 2 characters)"""
59+
# if not word:
60+
# return False
61+
# if len(word) < 2:
62+
# return False
63+
# if not word.isalpha():
64+
# return False
65+
return True
66+
67+
68+
def generate_t9_dict(input_file, max_words):
69+
"""Generate T9 dictionary from word list (preserves frequency order)"""
70+
words = []
71+
seen_words = set()
72+
73+
print(f"# Reading words from {input_file}...", file=sys.stderr)
74+
75+
with open(input_file, "r", encoding="utf-8") as f:
76+
for line in f:
77+
word = line.strip().lower()
78+
79+
# Skip if not valid or already seen
80+
if not is_valid_word(word):
81+
continue
82+
if word in seen_words:
83+
continue
84+
85+
# Convert to T9 sequence
86+
t9_seq = word_to_t9_sequence(word)
87+
if t9_seq is None:
88+
continue
89+
90+
words.append((t9_seq, word))
91+
seen_words.add(word)
92+
93+
# Stop if we have enough words
94+
if len(words) >= max_words:
95+
break
96+
97+
print(f"# Collected {len(words)} unique words", file=sys.stderr)
98+
99+
# Sort by T9 sequence for efficient lookup
100+
words.sort(key=lambda x: x[0])
101+
102+
print(f"# Sorted by T9 sequence", file=sys.stderr)
103+
104+
# Group by word length for better organization in output
105+
by_length = defaultdict(list)
106+
for t9_seq, word in words:
107+
by_length[len(word)].append((t9_seq, word))
108+
109+
return words, by_length
110+
111+
112+
def generate_header_file(words, by_length, max_words):
113+
"""Generate the T9Dictionary.h header file"""
114+
115+
print("#pragma once")
116+
print("#ifndef T9DICTIONARY_H")
117+
print("#define T9DICTIONARY_H")
118+
print()
119+
print("#include <Arduino.h>")
120+
print()
121+
print("/**")
122+
print(" * @brief Compact T9 dictionary for predictive text input")
123+
print(" *")
124+
print(
125+
f" * This dictionary contains {len(words)} most common English words, stored in flash memory"
126+
)
127+
print(
128+
" * to minimize RAM usage. Words are organized by their T9 key sequence for fast lookup."
129+
)
130+
print(" *")
131+
print(" * T9 Key Mapping:")
132+
print(" * 2: abc, 3: def, 4: ghi, 5: jkl, 6: mno, 7: pqrs, 8: tuv, 9: wxyz")
133+
print(" *")
134+
print(f" * Generated from frequency-sorted word list (top {max_words} words)")
135+
print(" */")
136+
print()
137+
print("// Dictionary entry: key sequence (max 8 digits) + word")
138+
print("struct T9Entry {")
139+
print(' const char *keySeq; // T9 key sequence (e.g., "4663" for "good")')
140+
print(" const char *word; // The actual word")
141+
print("};")
142+
print()
143+
print(f"// Top {len(words)} most common English words organized by T9 key sequence")
144+
print("// Stored in PROGMEM (flash) to save RAM")
145+
print("const T9Entry T9_DICTIONARY[] PROGMEM = {")
146+
147+
# Generate entries grouped by word length
148+
for length in sorted(by_length.keys()):
149+
entries = by_length[length]
150+
print(f" // {length}-letter words")
151+
152+
for t9_seq, word in entries:
153+
print(f' {{"{t9_seq}", "{word}"}},')
154+
print()
155+
156+
# Remove the last comma and newline
157+
print("};")
158+
print()
159+
print(
160+
"const uint16_t T9_DICTIONARY_SIZE = sizeof(T9_DICTIONARY) / sizeof(T9Entry);"
161+
)
162+
print()
163+
print("#endif // T9DICTIONARY_H")
164+
165+
166+
def main():
167+
if len(sys.argv) != 3:
168+
print(__doc__, file=sys.stderr)
169+
sys.exit(1)
170+
171+
input_file = sys.argv[1]
172+
max_words = int(sys.argv[2])
173+
174+
print(f"# Generating T9 dictionary:", file=sys.stderr)
175+
print(f"# Input: {input_file}", file=sys.stderr)
176+
print(f"# Max words: {max_words}", file=sys.stderr)
177+
print(file=sys.stderr)
178+
179+
# Generate dictionary
180+
words, by_length = generate_t9_dict(input_file, max_words)
181+
182+
# Generate header file
183+
generate_header_file(words, by_length, max_words)
184+
185+
# Print statistics
186+
print(f"# Generated {len(words)} entries", file=sys.stderr)
187+
print(f"# Word length distribution:", file=sys.stderr)
188+
for length in sorted(by_length.keys()):
189+
count = len(by_length[length])
190+
print(f"# {length} letters: {count} words", file=sys.stderr)
191+
192+
193+
if __name__ == "__main__":
194+
main()

0 commit comments

Comments
 (0)