Skip to content

Commit 80e444b

Browse files
committed
add interrupt support
1 parent f2ff92b commit 80e444b

File tree

7 files changed

+63
-25
lines changed

7 files changed

+63
-25
lines changed

firebase-ai/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
- [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig`
44
- [changed] Added better error messages to `ServiceConnectionHandshakeFailedException`
5+
- [changed] Added support for user interrupts for the `startAudioConversation` method in the
6+
`LiveSession` class.
57

68
# 17.3.0
79

firebase-ai/api.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ package com.google.firebase.ai.java {
148148
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList);
149149
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
150150
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
151-
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
151+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
152+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
153+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
152154
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
153155
method public abstract void stopReceiving();
154156
field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
@@ -889,7 +891,7 @@ package com.google.firebase.ai.type {
889891
method public suspend Object? send(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
890892
method public suspend Object? sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList, kotlin.coroutines.Continuation<? super kotlin.Unit>);
891893
method public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
892-
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
894+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, Boolean? enableInterruptions = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
893895
method public void stopAudioConversation();
894896
method public void stopReceiving();
895897
}

firebase-ai/gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version=99.9.9
15+
version=17.4.0
1616
latestReleasedVersion=17.3.0

firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,12 @@ internal val AudioRecord.minBufferSize: Int
3737
internal fun AudioRecord.readAsFlow() = flow {
3838
val buffer = ByteArray(minBufferSize)
3939

40-
var startTime = System.currentTimeMillis()
4140
while (true) {
4241
if (recordingState != AudioRecord.RECORDSTATE_RECORDING) {
4342
delay(10)
4443
yield()
4544
continue
4645
}
47-
if (System.currentTimeMillis() - startTime >= 100) {
48-
// This is the manual yield/pause point.
49-
// Using delay(1) suspends the coroutine, freeing the thread
50-
// for the dispatcher to run other tasks briefly.
51-
delay(1)
52-
yield()
53-
startTime = System.currentTimeMillis() // Reset the timer
54-
}
55-
5646
val bytesRead = read(buffer, 0, buffer.size)
5747
if (bytesRead > 0) {
5848
emit(buffer.copyOf(bytesRead))

firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public abstract class LiveSessionFutures internal constructor() {
4747
* @param functionCallHandler A callback function that is invoked whenever the model receives a
4848
* function call.
4949
*/
50+
@RequiresPermission(RECORD_AUDIO)
5051
public abstract fun startAudioConversation(
5152
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
5253
): ListenableFuture<Unit>
@@ -58,6 +59,36 @@ public abstract class LiveSessionFutures internal constructor() {
5859
@RequiresPermission(RECORD_AUDIO)
5960
public abstract fun startAudioConversation(): ListenableFuture<Unit>
6061

62+
/**
63+
* Starts an audio conversation with the model, which can only be stopped using
64+
* [stopAudioConversation] or [close].
65+
*
66+
* @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable
67+
* would allow the user to talk while the model is responding.
68+
*
69+
* **WARNING**: User interruption might not work reliably across all devices.
70+
*/
71+
@RequiresPermission(RECORD_AUDIO)
72+
public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture<Unit>
73+
74+
/**
75+
* Starts an audio conversation with the model, which can only be stopped using
76+
* [stopAudioConversation] or [close].
77+
*
78+
* @param functionCallHandler A callback function that is invoked whenever the model receives a
79+
* function call.
80+
*
81+
* @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable
82+
* would allow the user to talk while the model is responding.
83+
*
84+
* **WARNING**: User interruption might not work reliably across all devices.
85+
*/
86+
@RequiresPermission(RECORD_AUDIO)
87+
public abstract fun startAudioConversation(
88+
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
89+
enableInterruptions: Boolean
90+
): ListenableFuture<Unit>
91+
6192
/**
6293
* Stops the audio conversation with the Gemini Server.
6394
*
@@ -169,6 +200,24 @@ public abstract class LiveSessionFutures internal constructor() {
169200
override fun startAudioConversation() =
170201
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
171202

203+
@RequiresPermission(RECORD_AUDIO)
204+
override fun startAudioConversation(enableInterruptions: Boolean) =
205+
SuspendToFutureAdapter.launchFuture {
206+
session.startAudioConversation(enableInterruptions = enableInterruptions)
207+
}
208+
209+
@RequiresPermission(RECORD_AUDIO)
210+
override fun startAudioConversation(
211+
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
212+
enableInterruptions: Boolean
213+
) =
214+
SuspendToFutureAdapter.launchFuture {
215+
session.startAudioConversation(
216+
functionCallHandler,
217+
enableInterruptions = enableInterruptions
218+
)
219+
}
220+
172221
override fun stopAudioConversation() =
173222
SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() }
174223

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ internal class AudioHelper(
140140
* Returns an empty flow if this [AudioHelper] has been [released][release].
141141
*/
142142
fun listenToRecording(): Flow<ByteArray> {
143-
println("Released: $released")
144143
if (released) return emptyFlow()
145144
resumeRecording()
146145

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentLinkedQueue
3737
import java.util.concurrent.atomic.AtomicBoolean
3838
import kotlin.coroutines.CoroutineContext
3939
import kotlinx.coroutines.CoroutineScope
40-
import kotlinx.coroutines.Dispatchers
4140
import kotlinx.coroutines.cancel
4241
import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED
4342
import kotlinx.coroutines.flow.Flow
@@ -93,6 +92,11 @@ internal constructor(
9392
* @param functionCallHandler A callback function that is invoked whenever the model receives a
9493
* function call. The [FunctionResponsePart] that the callback function returns will be
9594
* automatically sent to the model.
95+
*
96+
* @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable
97+
* would allow the user to talk while the model is responding.
98+
*
99+
* **WARNING**: User interruption might not work reliably across all devices.
96100
*/
97101
@RequiresPermission(RECORD_AUDIO)
98102
public suspend fun startAudioConversation(
@@ -175,11 +179,9 @@ internal constructor(
175179
response
176180
.getOrNull()
177181
?.let {
178-
val x = JSON.decodeFromString<InternalLiveServerMessage>(
182+
JSON.decodeFromString<InternalLiveServerMessage>(
179183
it.readBytes().toString(Charsets.UTF_8)
180184
)
181-
println(x)
182-
x
183185
}
184186
?.let { emit(it.toPublic()) }
185187
yield()
@@ -233,7 +235,6 @@ internal constructor(
233235
BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() })
234236
.toInternal()
235237
)
236-
println("Sending function response $jsonString")
237238
session.send(Frame.Text(jsonString))
238239
}
239240
}
@@ -253,7 +254,6 @@ internal constructor(
253254
Json.encodeToString(
254255
BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal()
255256
)
256-
println("Sending $jsonString")
257257
session.send(Frame.Text(jsonString))
258258
}
259259
}
@@ -310,7 +310,7 @@ internal constructor(
310310
?.accumulateUntil(MIN_BUFFER_SIZE)
311311
?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) }
312312
?.catch { throw FirebaseAIException.from(it) }
313-
?.launchIn(CoroutineScope(Dispatchers.IO))
313+
?.launchIn(scope)
314314
}
315315

316316
/**
@@ -338,7 +338,6 @@ internal constructor(
338338
} else if (functionCallHandler != null) {
339339
// It's fine to suspend here since you can't have a function call running concurrently
340340
// with an audio response
341-
println("Model is attempting to send a function call response")
342341
sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList())
343342
} else {
344343
Log.w(
@@ -354,13 +353,11 @@ internal constructor(
354353
)
355354
}
356355
is LiveServerContent -> {
357-
println("State of it's interruption: ${it.interrupted}")
358356
if (it.interrupted) {
359357
playBackQueue.clear()
360358
} else {
361359
val audioParts = it.content?.parts?.filterIsInstance<InlineDataPart>().orEmpty()
362360
for (part in audioParts) {
363-
println("Model receiving ${part.inlineData}")
364361
playBackQueue.add(part.inlineData)
365362
}
366363
}
@@ -404,7 +401,6 @@ internal constructor(
404401
if (enableInterruptions != true) {
405402
audioHelper?.pauseRecording()
406403
}
407-
println("Model playing $playbackData")
408404
audioHelper?.playAudio(playbackData)
409405
}
410406
}

0 commit comments

Comments
 (0)