@@ -15,6 +15,12 @@ def lookup(engine: StenoEngine, text_to_lookup: str) -> list:
1515 """Recursively looks up a phrase by finding the longest possible dictionary match.
1616
1717 Starts from the beginning of the string and then solving for the remainder.
18+
19+ A lookup can fail (return an empty list) if any part of the tokenized input
20+ string cannot be found in Plover's dictionaries. The lookup is performed
21+ recursively, and if any segment of the phrase has no corresponding steno
22+ strokes, the entire lookup for that path will fail, and if no alternative
23+ paths are found, the overall result will be empty.
1824 """
1925 memo = {}
2026 log .debug (f"Starting lookup for: '{ text_to_lookup } '" )
@@ -27,6 +33,7 @@ def get_steno_for_phrase(phrase: str) -> list | None:
2733 """
2834 # 1. Try the phrase as-is (respecting capitalization)
2935 log .debug (f" - get_steno_for_phrase('{ phrase } ')" )
36+
3037 steno_capitalized : set = engine .reverse_lookup (phrase )
3138
3239 # If the phrase is a single non-word character (like '!'),
@@ -49,7 +56,28 @@ def get_steno_for_phrase(phrase: str) -> list | None:
4956
5057 # Prioritize direct capitalized results
5158 combined = steno_capitalized .union (steno_lowercase_modified )
59+ numeric_phrase = re .sub (r"[$,€£]" , "" , phrase .replace ("," , "" ))
60+ if numeric_phrase .isdigit ():
61+ digit_steno_list = []
62+ all_digits_found = True
63+ for digit in numeric_phrase :
64+ digit_steno = engine .reverse_lookup (digit )
65+ if not digit_steno :
66+ all_digits_found = False
67+ break
68+ digit_steno_list .append (min (digit_steno , key = len )) # Choose shortest steno for the digit
69+ if all_digits_found :
70+ combined_digit_steno = tuple (s for steno_tuple in digit_steno_list for s in steno_tuple )
71+ combined .add (combined_digit_steno )
72+
73+ # If after all attempts, we have no results, return None.
5274 if not combined :
75+ # Only issue a warning for single words that are not found, as this is the root cause of failure.
76+ is_single_word = " " not in phrase
77+ if is_single_word :
78+ log .warning (f"Failed to find steno for word: '{ phrase } '" )
79+ else :
80+ log .debug (f" - FAILED to find steno for phrase: '{ phrase } '" )
5381 return None
5482
5583 # Sort results: 1. Direct cap match, 2. Stroke count, 3. Key count
@@ -65,6 +93,8 @@ def solve(words_tuple: tuple) -> list[list[tuple]]:
6593 def get_steno_options (i ):
6694 return get_steno_for_phrase (" " .join (words_tuple [:i ]))
6795
96+ max_lookup_length = min (len (words_tuple ), engine ._dictionaries .longest_key )
97+
6898 def process_i (i , best_steno_for_prefix ):
6999 # Recursively find all solutions for the rest of the phrase
70100 prefix_phrase = " " .join (words_tuple [:i ])
@@ -76,23 +106,30 @@ def process_i(i, best_steno_for_prefix):
76106
77107 all_solutions = [
78108 solution
79- for i in range (len ( words_tuple ) , 0 , - 1 )
109+ for i in range (max_lookup_length , 0 , - 1 )
80110 if (steno_options := get_steno_options (i ))
81111 for solution in process_i (i , steno_options [0 ])
82112 ]
83113
114+ if not all_solutions :
115+ # This is the point of failure. It means for the current `words_tuple`,
116+ # no prefix could be found in the dictionary that also had a valid suffix solution.
117+ log .debug (f" <-- solve({ words_tuple } ) -> FAILED: No steno found for any prefix." )
84118 memo [words_tuple ] = all_solutions
85119 return all_solutions
86120
87121 # Tokenize the input string, separating words from punctuation.
88- # This finds sequences of word characters (\w+) or single non-word/non-space characters.
89- words = re .findall (r"\w+|[^\w\s]" , text_to_lookup )
122+ # This finds sequences of word characters (including those with internal apostrophes)
123+ # currency symbols attached to numbers, numbers with commas, or single non-word/non-space characters.
124+ token_regex = r"[$€£]?\d+(?:,\d+)*|\w+(?:['’]\w+)*|[^\w\s]" # nosec B105
125+ words = re .findall (token_regex , text_to_lookup )
90126
91127 all_possible_sequences = solve (tuple (words ))
92128
93129 log .debug (f"All possible sequences: { all_possible_sequences } " )
94130
95131 if not all_possible_sequences :
132+ log .debug (f"Lookup failed for '{ text_to_lookup } '. No valid steno sequence found." )
96133 return []
97134
98135 # Sort the collected sequences by overall efficiency
0 commit comments