Skip to content

Commit 37c69d0

Browse files
committed
Add dictation command processor for voice input
Adds a post-processing step to the voice input pipeline that converts spoken command phrases (e.g. "new line", "dollar sign", "caps on") into their corresponding characters and formatting. Covers 69 commands across 10 categories, each independently toggleable in a new Dictation Commands settings sub-page under Voice Input. Includes 108 JVM unit tests.
1 parent 5c7ed69 commit 37c69d0

File tree

9 files changed

+1902
-3
lines changed

9 files changed

+1902
-3
lines changed

build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ android {
328328
res.srcDirs = ['java/unstable/res', translationsWithoutEngValues('translations/devbuild')]
329329
}
330330

331+
test {
332+
java.srcDirs = ['java/test']
333+
}
334+
331335
androidTest {
332336
res.srcDirs = ['tests/res']
333337
java.srcDirs = ['tests/src']

java/res/values/strings-uix.xml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,26 @@
538538
<string name="voice_input_settings_change_models">Models</string>
539539
<string name="voice_input_settings_change_models_subtitle">To change the models, visit Languages &amp; Models menu</string>
540540

541+
<!-- Dictation commands settings -->
542+
<string name="dictation_commands_title">Dictation Commands</string>
543+
<string name="dictation_commands_subtitle">Replace spoken phrases like \"new line\" or \"dollar sign\" with symbols and formatting</string>
544+
<string name="dictation_formatting_title">Formatting</string>
545+
<string name="dictation_formatting_subtitle">New line, new paragraph, tab, numeral, no space on/off</string>
546+
<string name="dictation_capitalization_title">Capitalization</string>
547+
<string name="dictation_capitalization_subtitle">Caps on/off, all caps</string>
548+
<string name="dictation_punctuation_title">Punctuation &amp; Brackets</string>
549+
<string name="dictation_punctuation_subtitle">Quotes, brackets, parentheses, dash, ellipsis</string>
550+
<string name="dictation_symbols_title">Symbols</string>
551+
<string name="dictation_symbols_subtitle">Ampersand, asterisk, at sign, hashtag, etc.</string>
552+
<string name="dictation_math_title">Math Symbols</string>
553+
<string name="dictation_math_subtitle">Equal, plus, minus, greater than, less than</string>
554+
<string name="dictation_currency_title">Currency Symbols</string>
555+
<string name="dictation_currency_subtitle">Dollar, euro, pound, yen, cent</string>
556+
<string name="dictation_emoticons_title">Emoticons</string>
557+
<string name="dictation_emoticons_subtitle">Smiley, frowny, winky face</string>
558+
<string name="dictation_ip_marks_title">Intellectual Property Marks</string>
559+
<string name="dictation_ip_marks_subtitle">Copyright, registered, trademark</string>
560+
541561
<!-- Prediction menu -->
542562
<string name="prediction_settings_title">Text Prediction</string>
543563
<string name="prediction_settings_transformer">Transformer LM</string>

java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,50 @@ val LANGUAGE_TOGGLES = SettingsKey(
6767
val USE_PERSONAL_DICT = SettingsKey(
6868
key = booleanPreferencesKey("use_personal_dict_voice_input"),
6969
default = true
70+
)
71+
72+
// Dictation command settings
73+
val DICTATION_COMMANDS_ENABLED = SettingsKey(
74+
key = booleanPreferencesKey("dictation_commands_enabled"),
75+
default = true
76+
)
77+
78+
val DICTATION_FORMATTING = SettingsKey(
79+
key = booleanPreferencesKey("dictation_formatting"),
80+
default = true
81+
)
82+
83+
val DICTATION_CAPITALIZATION = SettingsKey(
84+
key = booleanPreferencesKey("dictation_capitalization"),
85+
default = true
86+
)
87+
88+
val DICTATION_PUNCTUATION = SettingsKey(
89+
key = booleanPreferencesKey("dictation_punctuation"),
90+
default = true
91+
)
92+
93+
val DICTATION_SYMBOLS = SettingsKey(
94+
key = booleanPreferencesKey("dictation_symbols"),
95+
default = true
96+
)
97+
98+
val DICTATION_MATH = SettingsKey(
99+
key = booleanPreferencesKey("dictation_math"),
100+
default = true
101+
)
102+
103+
val DICTATION_CURRENCY = SettingsKey(
104+
key = booleanPreferencesKey("dictation_currency"),
105+
default = true
106+
)
107+
108+
val DICTATION_EMOTICONS = SettingsKey(
109+
key = booleanPreferencesKey("dictation_emoticons"),
110+
default = true
111+
)
112+
113+
val DICTATION_IP_MARKS = SettingsKey(
114+
key = booleanPreferencesKey("dictation_ip_marks"),
115+
default = true
70116
)

java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,20 @@ import kotlinx.coroutines.launch
2525
import kotlinx.coroutines.runBlocking
2626
import kotlinx.coroutines.yield
2727
import org.futo.inputmethod.latin.R
28-
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
2928
import org.futo.inputmethod.latin.uix.Action
3029
import org.futo.inputmethod.latin.uix.ActionWindow
30+
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
3131
import org.futo.inputmethod.latin.uix.CAN_EXPAND_SPACE
3232
import org.futo.inputmethod.latin.uix.CloseResult
33+
import org.futo.inputmethod.latin.uix.DICTATION_CAPITALIZATION
34+
import org.futo.inputmethod.latin.uix.DICTATION_COMMANDS_ENABLED
35+
import org.futo.inputmethod.latin.uix.DICTATION_CURRENCY
36+
import org.futo.inputmethod.latin.uix.DICTATION_EMOTICONS
37+
import org.futo.inputmethod.latin.uix.DICTATION_FORMATTING
38+
import org.futo.inputmethod.latin.uix.DICTATION_IP_MARKS
39+
import org.futo.inputmethod.latin.uix.DICTATION_MATH
40+
import org.futo.inputmethod.latin.uix.DICTATION_PUNCTUATION
41+
import org.futo.inputmethod.latin.uix.DICTATION_SYMBOLS
3342
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
3443
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
3544
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
@@ -42,6 +51,8 @@ import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
4251
import org.futo.inputmethod.latin.uix.getSetting
4352
import org.futo.inputmethod.latin.uix.setSetting
4453
import org.futo.inputmethod.latin.uix.settings.SettingsActivity
54+
import org.futo.inputmethod.latin.uix.utils.DictationCommandProcessor
55+
import org.futo.inputmethod.latin.uix.utils.DictationSettings
4556
import org.futo.inputmethod.latin.uix.utils.ModelOutputSanitizer
4657
import org.futo.inputmethod.latin.xlm.UserDictionaryObserver
4758
import org.futo.inputmethod.updates.openURI
@@ -118,6 +129,7 @@ private class VoiceInputActionWindow(
118129
val context = manager.getContext()
119130

120131
private var shouldPlaySounds: Boolean = false
132+
private var dictationSettings: DictationSettings = DictationSettings()
121133
private fun loadSettings(): RecognizerViewSettings {
122134
val enableSound = context.getSetting(ENABLE_SOUND)
123135
val verboseFeedback = false//context.getSetting(VERBOSE_PROGRESS)
@@ -139,6 +151,18 @@ private class VoiceInputActionWindow(
139151

140152
shouldPlaySounds = enableSound
141153

154+
dictationSettings = DictationSettings(
155+
enabled = context.getSetting(DICTATION_COMMANDS_ENABLED),
156+
formatting = context.getSetting(DICTATION_FORMATTING),
157+
capitalization = context.getSetting(DICTATION_CAPITALIZATION),
158+
punctuation = context.getSetting(DICTATION_PUNCTUATION),
159+
symbols = context.getSetting(DICTATION_SYMBOLS),
160+
math = context.getSetting(DICTATION_MATH),
161+
currency = context.getSetting(DICTATION_CURRENCY),
162+
emoticons = context.getSetting(DICTATION_EMOTICONS),
163+
ipMarks = context.getSetting(DICTATION_IP_MARKS)
164+
)
165+
142166
return RecognizerViewSettings(
143167
shouldShowInlinePartialResult = false,
144168
shouldShowVerboseFeedback = verboseFeedback,
@@ -263,8 +287,11 @@ private class VoiceInputActionWindow(
263287
wasFinished = true
264288

265289
manager.getLifecycleScope().launch(Dispatchers.Main) {
290+
// Sanitize first (clean up Whisper quirks, add cursor-context spacing),
291+
// then process dictation commands (so \n, \t etc. aren't mangled by trim)
266292
val sanitized = ModelOutputSanitizer.sanitize(result, inputTransaction.textContext)
267-
inputTransaction.commit(sanitized)
293+
val processed = DictationCommandProcessor.process(sanitized, dictationSettings)
294+
inputTransaction.commit(processed)
268295
manager.announce(result)
269296
manager.closeActionWindow()
270297
}
@@ -273,7 +300,8 @@ private class VoiceInputActionWindow(
273300
override fun partialResult(result: String) {
274301
manager.getLifecycleScope().launch(Dispatchers.Main) {
275302
val sanitized = ModelOutputSanitizer.sanitize(result, inputTransaction.textContext)
276-
inputTransaction.updatePartial(sanitized)
303+
val processed = DictationCommandProcessor.process(sanitized, dictationSettings)
304+
inputTransaction.updatePartial(processed)
277305
}
278306
}
279307

java/src/org/futo/inputmethod/latin/uix/settings/SettingsNavigator.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import org.futo.inputmethod.latin.uix.settings.pages.SelectLanguageScreen
5555
import org.futo.inputmethod.latin.uix.settings.pages.SelectLayoutsScreen
5656
import org.futo.inputmethod.latin.uix.settings.pages.themes.ThemeScreen
5757
import org.futo.inputmethod.latin.uix.settings.pages.TypingSettingsMenu
58+
import org.futo.inputmethod.latin.uix.settings.pages.DictationCommandsMenu
5859
import org.futo.inputmethod.latin.uix.settings.pages.VoiceInputMenu
5960
import org.futo.inputmethod.latin.uix.settings.pages.addModelManagerNavigation
6061
import org.futo.inputmethod.latin.uix.settings.pages.buggyeditors.BuggyTextEditVariations
@@ -86,6 +87,7 @@ val SettingsMenus = listOf(
8687
PredictiveTextMenu,
8788
BlacklistScreenLite,
8889
VoiceInputMenu,
90+
DictationCommandsMenu,
8991
ActionsScreen,
9092
HelpMenu,
9193
MiscMenu,

java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,15 @@ import androidx.compose.runtime.Composable
55
import org.futo.inputmethod.latin.R
66
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
77
import org.futo.inputmethod.latin.uix.CAN_EXPAND_SPACE
8+
import org.futo.inputmethod.latin.uix.DICTATION_CAPITALIZATION
9+
import org.futo.inputmethod.latin.uix.DICTATION_COMMANDS_ENABLED
10+
import org.futo.inputmethod.latin.uix.DICTATION_CURRENCY
11+
import org.futo.inputmethod.latin.uix.DICTATION_EMOTICONS
12+
import org.futo.inputmethod.latin.uix.DICTATION_FORMATTING
13+
import org.futo.inputmethod.latin.uix.DICTATION_IP_MARKS
14+
import org.futo.inputmethod.latin.uix.DICTATION_MATH
15+
import org.futo.inputmethod.latin.uix.DICTATION_PUNCTUATION
16+
import org.futo.inputmethod.latin.uix.DICTATION_SYMBOLS
817
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
918
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
1019
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
@@ -22,6 +31,66 @@ private val visibilityCheckNotSystemVoiceInput = @Composable {
2231
useDataStoreValue(USE_SYSTEM_VOICE_INPUT) == false
2332
}
2433

34+
val DictationCommandsMenu = UserSettingsMenu(
35+
title = R.string.dictation_commands_title,
36+
navPath = "dictationCommands", registerNavPath = true,
37+
settings = listOf(
38+
userSettingToggleDataStore(
39+
title = R.string.dictation_commands_title,
40+
subtitle = R.string.dictation_commands_subtitle,
41+
setting = DICTATION_COMMANDS_ENABLED
42+
),
43+
44+
userSettingToggleDataStore(
45+
title = R.string.dictation_formatting_title,
46+
subtitle = R.string.dictation_formatting_subtitle,
47+
setting = DICTATION_FORMATTING
48+
),
49+
50+
userSettingToggleDataStore(
51+
title = R.string.dictation_capitalization_title,
52+
subtitle = R.string.dictation_capitalization_subtitle,
53+
setting = DICTATION_CAPITALIZATION
54+
),
55+
56+
userSettingToggleDataStore(
57+
title = R.string.dictation_punctuation_title,
58+
subtitle = R.string.dictation_punctuation_subtitle,
59+
setting = DICTATION_PUNCTUATION
60+
),
61+
62+
userSettingToggleDataStore(
63+
title = R.string.dictation_symbols_title,
64+
subtitle = R.string.dictation_symbols_subtitle,
65+
setting = DICTATION_SYMBOLS
66+
),
67+
68+
userSettingToggleDataStore(
69+
title = R.string.dictation_math_title,
70+
subtitle = R.string.dictation_math_subtitle,
71+
setting = DICTATION_MATH
72+
),
73+
74+
userSettingToggleDataStore(
75+
title = R.string.dictation_currency_title,
76+
subtitle = R.string.dictation_currency_subtitle,
77+
setting = DICTATION_CURRENCY
78+
),
79+
80+
userSettingToggleDataStore(
81+
title = R.string.dictation_emoticons_title,
82+
subtitle = R.string.dictation_emoticons_subtitle,
83+
setting = DICTATION_EMOTICONS
84+
),
85+
86+
userSettingToggleDataStore(
87+
title = R.string.dictation_ip_marks_title,
88+
subtitle = R.string.dictation_ip_marks_subtitle,
89+
setting = DICTATION_IP_MARKS
90+
)
91+
)
92+
)
93+
2594
val VoiceInputMenu = UserSettingsMenu(
2695
title = R.string.voice_input_settings_title,
2796
navPath = "voiceInput", registerNavPath = true,
@@ -82,6 +151,13 @@ val VoiceInputMenu = UserSettingsMenu(
82151
setting = USE_VAD_AUTOSTOP
83152
).copy(visibilityCheck = visibilityCheckNotSystemVoiceInput),
84153

154+
userSettingNavigationItem(
155+
title = R.string.dictation_commands_title,
156+
subtitle = R.string.dictation_commands_subtitle,
157+
style = NavigationItemStyle.Misc,
158+
navigateTo = "dictationCommands"
159+
).copy(visibilityCheck = visibilityCheckNotSystemVoiceInput),
160+
85161
userSettingNavigationItem(
86162
title = R.string.voice_input_settings_change_models,
87163
subtitle = R.string.voice_input_settings_change_models_subtitle,

0 commit comments

Comments
 (0)