diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 05f6e455fd2..a2b6c34dd75 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,6 +1,8 @@ # Unreleased - [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig` +- [changed] Added support for user interrupts for the `startAudioConversation` method in the + `LiveSession` class. (#7413) - [changed] Added support for the URL context tool, which allows the model to access content from provided public web URLs to inform and enhance its responses. (#7382) - [changed] Added better error messages to `ServiceConnectionHandshakeFailedException` (#7412) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 0c0420eeb6f..f8df1f045bc 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -148,7 +148,9 @@ package com.google.firebase.ai.java { method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); - method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -891,6 +893,7 @@ package com.google.firebase.ai.type { method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index 4d7a1e46097..6179c8b52e9 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -17,6 +17,8 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord +import kotlin.time.Duration.Companion.milliseconds +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow import kotlinx.coroutines.yield @@ -38,13 +40,15 @@ internal fun AudioRecord.readAsFlow() = flow { while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + // TODO(vguthal): Investigate if both yield and delay are required. + delay(10.milliseconds) yield() continue } - val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) } + yield() } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 1efa2dfedfc..a9615ac2afb 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -47,6 +47,7 @@ public abstract class LiveSessionFutures internal constructor() { * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. */ + @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): ListenableFuture @@ -58,6 +59,38 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ): ListenableFuture + /** * Stops the audio conversation with the Gemini Server. * @@ -169,6 +202,24 @@ public abstract class LiveSessionFutures internal constructor() { override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation(enableInterruptions: Boolean) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation(enableInterruptions = enableInterruptions) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler, + enableInterruptions = enableInterruptions + ) + } + override fun stopAudioConversation() = SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 4db66ae6c3e..08e90fc8538 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -141,7 +141,6 @@ internal class AudioHelper( */ fun listenToRecording(): Flow { if (released) return emptyFlow() - resumeRecording() return recorder.readAsFlow() diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index ccdc3e7fe95..c703cd959c3 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -97,6 +97,28 @@ internal constructor( public suspend fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null ) { + startAudioConversation(functionCallHandler, false) + } + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public suspend fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + enableInterruptions: Boolean = false, + ) { val context = firebaseApp.applicationContext if ( @@ -120,7 +142,7 @@ internal constructor( recordUserAudio() processModelResponses(functionCallHandler) - listenForModelPlayback() + listenForModelPlayback(enableInterruptions) } } @@ -375,14 +397,16 @@ internal constructor( * * Launched asynchronously on [scope]. */ - private fun listenForModelPlayback() { + private fun listenForModelPlayback(enableInterruptions: Boolean = false) { scope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { // The model playback queue is complete, so we can continue recording // TODO(b/408223520): Conditionally resume when param is added - audioHelper?.resumeRecording() + if (!enableInterruptions) { + audioHelper?.resumeRecording() + } yield() } else { /** @@ -390,8 +414,9 @@ internal constructor( * no echo cancellation */ // TODO(b/408223520): Conditionally pause when param is added - audioHelper?.pauseRecording() - + if (enableInterruptions != true) { + audioHelper?.pauseRecording() + } audioHelper?.playAudio(playbackData) } }