From 873db81b0052fc4d2c981bf5f790fa4d485bd42d Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Thu, 25 Sep 2025 13:56:17 -0400 Subject: [PATCH 1/3] add interrupt support --- .../com/google/firebase/ai/type/LiveSession.kt | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index a91d7e4aedf..191c9a49f8e 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -95,7 +95,8 @@ internal constructor( */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + enableInterruptions: Boolean? = null, ) { val context = firebaseApp.applicationContext @@ -120,7 +121,7 @@ internal constructor( recordUserAudio() processModelResponses(functionCallHandler) - listenForModelPlayback() + listenForModelPlayback(enableInterruptions) } } @@ -375,14 +376,16 @@ internal constructor( * * Launched asynchronously on [scope]. */ - private fun listenForModelPlayback() { + private fun listenForModelPlayback(enableInterruptions: Boolean? = null) { scope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { // The model playback queue is complete, so we can continue recording // TODO(b/408223520): Conditionally resume when param is added - audioHelper?.resumeRecording() + if (enableInterruptions != true) { + audioHelper?.resumeRecording() + } yield() } else { /** @@ -390,7 +393,9 @@ internal constructor( * no echo cancellation */ // TODO(b/408223520): Conditionally pause when param is added - audioHelper?.pauseRecording() + if (enableInterruptions != true) { + audioHelper?.pauseRecording() + } audioHelper?.playAudio(playbackData) } From f2ff92b9b2c3a65d03ba6d698e13825b74d43887 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 6 Oct 2025 13:18:30 -0400 Subject: [PATCH 2/3] update --- firebase-ai/gradle.properties | 2 +- .../com/google/firebase/ai/common/util/android.kt | 12 ++++++++++++ .../com/google/firebase/ai/type/AudioHelper.kt | 2 +- .../com/google/firebase/ai/type/LiveSession.kt | 14 +++++++++++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index 794b7a23197..15e226a5aac 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.3.1 +version=99.9.9 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index 4d7a1e46097..d50bf5a549c 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -17,6 +17,7 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow import kotlinx.coroutines.yield @@ -36,15 +37,26 @@ internal val AudioRecord.minBufferSize: Int internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) + var startTime = System.currentTimeMillis() while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + delay(10) yield() continue } + if (System.currentTimeMillis() - startTime >= 100) { + // This is the manual yield/pause point. + // Using delay(1) suspends the coroutine, freeing the thread + // for the dispatcher to run other tasks briefly. + delay(1) + yield() + startTime = System.currentTimeMillis() // Reset the timer + } val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) } + yield() } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 4db66ae6c3e..3100f6f9ff5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -140,8 +140,8 @@ internal class AudioHelper( * Returns an empty flow if this [AudioHelper] has been [released][release]. */ fun listenToRecording(): Flow { + println("Released: $released") if (released) return emptyFlow() - resumeRecording() return recorder.readAsFlow() diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index ecf4f8f0711..696fa1bb957 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -37,6 +37,7 @@ import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow @@ -174,9 +175,11 @@ internal constructor( response .getOrNull() ?.let { - JSON.decodeFromString( + val x = JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) + println(x) + x } ?.let { emit(it.toPublic()) } yield() @@ -230,6 +233,7 @@ internal constructor( BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() }) .toInternal() ) + println("Sending function response $jsonString") session.send(Frame.Text(jsonString)) } } @@ -249,6 +253,7 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) + println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -305,7 +310,7 @@ internal constructor( ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } ?.catch { throw FirebaseAIException.from(it) } - ?.launchIn(scope) + ?.launchIn(CoroutineScope(Dispatchers.IO)) } /** @@ -333,6 +338,7 @@ internal constructor( } else if (functionCallHandler != null) { // It's fine to suspend here since you can't have a function call running concurrently // with an audio response + println("Model is attempting to send a function call response") sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList()) } else { Log.w( @@ -348,11 +354,13 @@ internal constructor( ) } is LiveServerContent -> { + println("State of it's interruption: ${it.interrupted}") if (it.interrupted) { playBackQueue.clear() } else { val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { + println("Model receiving ${part.inlineData}") playBackQueue.add(part.inlineData) } } @@ -396,7 +404,7 @@ internal constructor( if (enableInterruptions != true) { audioHelper?.pauseRecording() } - + println("Model playing $playbackData") audioHelper?.playAudio(playbackData) } } From 80e444b1c0f232c38e68a33f70be2e9734ae53f0 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 6 Oct 2025 15:38:01 -0400 Subject: [PATCH 3/3] add interrupt support --- firebase-ai/CHANGELOG.md | 2 + firebase-ai/api.txt | 6 ++- firebase-ai/gradle.properties | 2 +- .../google/firebase/ai/common/util/android.kt | 10 ---- .../firebase/ai/java/LiveSessionFutures.kt | 49 +++++++++++++++++++ .../google/firebase/ai/type/AudioHelper.kt | 1 - .../google/firebase/ai/type/LiveSession.kt | 18 +++---- 7 files changed, 63 insertions(+), 25 deletions(-) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 4432555a470..c6662411bd9 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -2,6 +2,8 @@ - [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig` - [changed] Added better error messages to `ServiceConnectionHandshakeFailedException` +- [changed] Added support for user interrupts for the `startAudioConversation` method in the + `LiveSession` class. # 17.3.0 diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index a390a14147e..576e61cf0e5 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -148,7 +148,9 @@ package com.google.firebase.ai.java { method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); - method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -889,7 +891,7 @@ package com.google.firebase.ai.type { method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, Boolean? enableInterruptions = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index 15e226a5aac..a61baee5a19 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=99.9.9 +version=17.4.0 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index d50bf5a549c..e299d3164f5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -37,22 +37,12 @@ internal val AudioRecord.minBufferSize: Int internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) - var startTime = System.currentTimeMillis() while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { delay(10) yield() continue } - if (System.currentTimeMillis() - startTime >= 100) { - // This is the manual yield/pause point. - // Using delay(1) suspends the coroutine, freeing the thread - // for the dispatcher to run other tasks briefly. - delay(1) - yield() - startTime = System.currentTimeMillis() // Reset the timer - } - val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 1efa2dfedfc..a7c667834bd 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -47,6 +47,7 @@ public abstract class LiveSessionFutures internal constructor() { * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. */ + @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): ListenableFuture @@ -58,6 +59,36 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ): ListenableFuture + /** * Stops the audio conversation with the Gemini Server. * @@ -169,6 +200,24 @@ public abstract class LiveSessionFutures internal constructor() { override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation(enableInterruptions: Boolean) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation(enableInterruptions = enableInterruptions) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler, + enableInterruptions = enableInterruptions + ) + } + override fun stopAudioConversation() = SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 3100f6f9ff5..08e90fc8538 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -140,7 +140,6 @@ internal class AudioHelper( * Returns an empty flow if this [AudioHelper] has been [released][release]. */ fun listenToRecording(): Flow { - println("Released: $released") if (released) return emptyFlow() resumeRecording() diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 696fa1bb957..616569f1026 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow @@ -93,6 +92,11 @@ internal constructor( * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( @@ -175,11 +179,9 @@ internal constructor( response .getOrNull() ?.let { - val x = JSON.decodeFromString( + JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) - println(x) - x } ?.let { emit(it.toPublic()) } yield() @@ -233,7 +235,6 @@ internal constructor( BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() }) .toInternal() ) - println("Sending function response $jsonString") session.send(Frame.Text(jsonString)) } } @@ -253,7 +254,6 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) - println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -310,7 +310,7 @@ internal constructor( ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } ?.catch { throw FirebaseAIException.from(it) } - ?.launchIn(CoroutineScope(Dispatchers.IO)) + ?.launchIn(scope) } /** @@ -338,7 +338,6 @@ internal constructor( } else if (functionCallHandler != null) { // It's fine to suspend here since you can't have a function call running concurrently // with an audio response - println("Model is attempting to send a function call response") sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList()) } else { Log.w( @@ -354,13 +353,11 @@ internal constructor( ) } is LiveServerContent -> { - println("State of it's interruption: ${it.interrupted}") if (it.interrupted) { playBackQueue.clear() } else { val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { - println("Model receiving ${part.inlineData}") playBackQueue.add(part.inlineData) } } @@ -404,7 +401,6 @@ internal constructor( if (enableInterruptions != true) { audioHelper?.pauseRecording() } - println("Model playing $playbackData") audioHelper?.playAudio(playbackData) } }