Skip to content

Commit 263a9d1

Browse files
committed
add wake word and client side commands
1 parent 9ee948c commit 263a9d1

File tree

23 files changed

+4982
-153
lines changed

23 files changed

+4982
-153
lines changed

android/app/build.gradle

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ flutter {
9494
dependencies {
9595
coreLibraryDesugaring 'com.android.tools:desugar_jdk_libs:1.2.2'
9696
// FFmpeg dependencies have been removed as we now use native speech recognition
97+
98+
// Wearable Data Layer API for Pixel Watch / Wear OS connectivity
99+
implementation 'com.google.android.gms:play-services-wearable:18.1.0'
100+
101+
// Kotlin coroutines for async operations
102+
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
103+
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-play-services:1.7.3'
97104
}
98105

99106
subprojects {

android/app/src/main/kotlin/dev/agixt/agixt/MainActivity.kt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class MainActivity: FlutterActivity() {
3131
private val TAG = "MainActivity"
3232
private var methodChannelInitialized = false
3333
private var pendingToken: String? = null
34+
35+
// Voice & Watch handlers
36+
private var wakeWordHandler: WakeWordHandler? = null
37+
private var watchHandler: WatchHandler? = null
3438

3539
override fun onCreate(savedInstanceState: Bundle?) {
3640
super.onCreate(savedInstanceState)
@@ -128,6 +132,9 @@ class MainActivity: FlutterActivity() {
128132

129133
override fun onDestroy() {
130134
super.onDestroy()
135+
// Clean up handlers
136+
wakeWordHandler?.destroy()
137+
watchHandler?.destroy()
131138
BackgroundService.stopService(this@MainActivity, null)
132139
}
133140

@@ -198,6 +205,13 @@ class MainActivity: FlutterActivity() {
198205
// Mark that the method channels are initialized
199206
methodChannelInitialized = true
200207

208+
// Initialize Voice & Watch handlers
209+
wakeWordHandler = WakeWordHandler(this, binaryMessenger)
210+
wakeWordHandler?.initialize()
211+
212+
watchHandler = WatchHandler(this, binaryMessenger)
213+
watchHandler?.initialize()
214+
201215
// Check if we have a pending token to send
202216
pendingToken?.let { token ->
203217
sendTokenToFlutter(token)
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
package dev.agixt.agixt
2+
3+
import android.content.Context
4+
import android.content.Intent
5+
import android.os.Bundle
6+
import android.speech.RecognitionListener
7+
import android.speech.RecognizerIntent
8+
import android.speech.SpeechRecognizer
9+
import android.util.Log
10+
import io.flutter.plugin.common.MethodChannel
11+
import io.flutter.plugin.common.BinaryMessenger
12+
import java.util.Locale
13+
14+
/**
15+
* Handles wake word detection using Android's SpeechRecognizer
16+
* Listens for "computer" keyword to trigger voice input
17+
*/
18+
class WakeWordHandler(
19+
private val context: Context,
20+
private val binaryMessenger: BinaryMessenger
21+
) {
22+
private val TAG = "WakeWordHandler"
23+
private val CHANNEL = "dev.agixt.agixt/wake_word"
24+
25+
private var speechRecognizer: SpeechRecognizer? = null
26+
private var isListening = false
27+
private var isPaused = false
28+
private var wakeWord = "computer"
29+
private var sensitivity = 0.5f
30+
private lateinit var methodChannel: MethodChannel
31+
32+
fun initialize() {
33+
methodChannel = MethodChannel(binaryMessenger, CHANNEL)
34+
35+
methodChannel.setMethodCallHandler { call, result ->
36+
when (call.method) {
37+
"initialize" -> {
38+
val word = call.argument<String>("wakeWord") ?: "computer"
39+
val sens = call.argument<Double>("sensitivity")?.toFloat() ?: 0.5f
40+
wakeWord = word.lowercase()
41+
sensitivity = sens
42+
setupSpeechRecognizer()
43+
result.success(true)
44+
}
45+
"start" -> {
46+
startListening()
47+
result.success(true)
48+
}
49+
"stop" -> {
50+
stopListening()
51+
result.success(true)
52+
}
53+
"pause" -> {
54+
pause()
55+
result.success(true)
56+
}
57+
"resume" -> {
58+
resume()
59+
result.success(true)
60+
}
61+
"setWakeWord" -> {
62+
wakeWord = (call.argument<String>("wakeWord") ?: "computer").lowercase()
63+
result.success(true)
64+
}
65+
"setSensitivity" -> {
66+
sensitivity = call.argument<Double>("sensitivity")?.toFloat() ?: 0.5f
67+
result.success(true)
68+
}
69+
"isAvailable" -> {
70+
result.success(SpeechRecognizer.isRecognitionAvailable(context))
71+
}
72+
else -> result.notImplemented()
73+
}
74+
}
75+
}
76+
77+
private fun setupSpeechRecognizer() {
78+
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
79+
Log.e(TAG, "Speech recognition not available on this device")
80+
return
81+
}
82+
83+
speechRecognizer?.destroy()
84+
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
85+
86+
speechRecognizer?.setRecognitionListener(object : RecognitionListener {
87+
override fun onReadyForSpeech(params: Bundle?) {
88+
Log.d(TAG, "Ready for speech")
89+
}
90+
91+
override fun onBeginningOfSpeech() {
92+
Log.d(TAG, "Beginning of speech")
93+
}
94+
95+
override fun onRmsChanged(rmsdB: Float) {
96+
// Audio level changed - could be used for UI feedback
97+
}
98+
99+
override fun onBufferReceived(buffer: ByteArray?) {
100+
// Not typically used
101+
}
102+
103+
override fun onEndOfSpeech() {
104+
Log.d(TAG, "End of speech")
105+
}
106+
107+
override fun onError(error: Int) {
108+
val errorMessage = when (error) {
109+
SpeechRecognizer.ERROR_AUDIO -> "Audio recording error"
110+
SpeechRecognizer.ERROR_CLIENT -> "Client side error"
111+
SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Insufficient permissions"
112+
SpeechRecognizer.ERROR_NETWORK -> "Network error"
113+
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
114+
SpeechRecognizer.ERROR_NO_MATCH -> "No match found"
115+
SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy"
116+
SpeechRecognizer.ERROR_SERVER -> "Server error"
117+
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
118+
else -> "Unknown error"
119+
}
120+
Log.d(TAG, "Error: $errorMessage ($error)")
121+
122+
// Restart listening if still enabled and not paused
123+
if (isListening && !isPaused && error != SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) {
124+
// Small delay before restarting to avoid rapid cycling
125+
android.os.Handler(context.mainLooper).postDelayed({
126+
if (isListening && !isPaused) {
127+
startRecognition()
128+
}
129+
}, 500)
130+
}
131+
}
132+
133+
override fun onResults(results: Bundle?) {
134+
val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
135+
val confidences = results?.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES)
136+
137+
Log.d(TAG, "Results: $matches")
138+
139+
if (matches != null) {
140+
for ((index, match) in matches.withIndex()) {
141+
val confidence = confidences?.getOrNull(index) ?: 0.5f
142+
val matchLower = match.lowercase()
143+
144+
// Check if wake word is detected with sufficient confidence
145+
// Adjust threshold based on sensitivity setting
146+
val threshold = 1.0f - sensitivity
147+
148+
if (matchLower.contains(wakeWord) && confidence >= threshold) {
149+
Log.i(TAG, "Wake word '$wakeWord' detected! Confidence: $confidence")
150+
151+
// Notify Flutter
152+
methodChannel.invokeMethod("onWakeWordDetected", mapOf(
153+
"wakeWord" to wakeWord,
154+
"confidence" to confidence.toDouble(),
155+
"transcript" to match
156+
))
157+
158+
// Pause listening while user is speaking
159+
pause()
160+
return
161+
}
162+
}
163+
}
164+
165+
// Continue listening if wake word wasn't detected
166+
if (isListening && !isPaused) {
167+
startRecognition()
168+
}
169+
}
170+
171+
override fun onPartialResults(partialResults: Bundle?) {
172+
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
173+
174+
if (matches != null) {
175+
for (match in matches) {
176+
val matchLower = match.lowercase()
177+
if (matchLower.contains(wakeWord)) {
178+
Log.d(TAG, "Partial wake word detection: $match")
179+
// Could send partial detection event if needed
180+
}
181+
}
182+
}
183+
}
184+
185+
override fun onEvent(eventType: Int, params: Bundle?) {
186+
Log.d(TAG, "Event: $eventType")
187+
}
188+
})
189+
}
190+
191+
private fun startListening() {
192+
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
193+
Log.e(TAG, "Speech recognition not available")
194+
return
195+
}
196+
197+
isListening = true
198+
isPaused = false
199+
startRecognition()
200+
}
201+
202+
private fun startRecognition() {
203+
if (speechRecognizer == null) {
204+
setupSpeechRecognizer()
205+
}
206+
207+
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
208+
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
209+
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
210+
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
211+
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 5)
212+
// Shorter silence timeouts for wake word detection
213+
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 1000)
214+
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 1500)
215+
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1000)
216+
}
217+
218+
try {
219+
speechRecognizer?.startListening(intent)
220+
} catch (e: Exception) {
221+
Log.e(TAG, "Error starting recognition: ${e.message}")
222+
}
223+
}
224+
225+
private fun stopListening() {
226+
isListening = false
227+
isPaused = false
228+
speechRecognizer?.stopListening()
229+
speechRecognizer?.cancel()
230+
}
231+
232+
private fun pause() {
233+
isPaused = true
234+
speechRecognizer?.stopListening()
235+
speechRecognizer?.cancel()
236+
}
237+
238+
private fun resume() {
239+
if (isListening) {
240+
isPaused = false
241+
startRecognition()
242+
}
243+
}
244+
245+
fun destroy() {
246+
stopListening()
247+
speechRecognizer?.destroy()
248+
speechRecognizer = null
249+
}
250+
}

0 commit comments

Comments
 (0)