From 92dc3acbf76dde1d921f140d374b072e8e39595e Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 21 Oct 2025 14:24:49 -0400 Subject: [PATCH 1/8] add the user handler --- firebase-ai/api.txt | 9 + .../firebase/ai/java/LiveSessionFutures.kt | 284 ++++++++++++++++++ .../google/firebase/ai/type/AudioHelper.kt | 5 +- .../google/firebase/ai/type/LiveSession.kt | 36 ++- 4 files changed, 332 insertions(+), 2 deletions(-) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index f73c51d7112..071349cfe9d 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -154,9 +154,17 @@ package com.google.firebase.ai.java { method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? audioHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? audioHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -925,6 +933,7 @@ package com.google.firebase.ai.type { method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, kotlin.jvm.functions.Function2? audioHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 5a04ed9f97c..d48d63fd659 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -17,6 +17,8 @@ package com.google.firebase.ai.java import android.Manifest.permission.RECORD_AUDIO +import android.media.AudioRecord +import android.media.AudioTrack import androidx.annotation.RequiresPermission import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture @@ -106,6 +108,186 @@ public abstract class LiveSessionFutures internal constructor() { enableInterruptions: Boolean ): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ): ListenableFuture + /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. @@ -320,6 +502,108 @@ public abstract class LiveSessionFutures internal constructor() { ) } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler = functionCallHandler, + audioHandler = audioHandler + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + transcriptHandler = transcriptHandler, + audioHandler = audioHandler + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + audioHandler = audioHandler, + enableInterruptions = enableInterruptions + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler = functionCallHandler, + transcriptHandler = transcriptHandler, + audioHandler = audioHandler, + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler = functionCallHandler, + transcriptHandler = transcriptHandler, + audioHandler = audioHandler, + enableInterruptions = enableInterruptions + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler = functionCallHandler, + audioHandler = audioHandler, + enableInterruptions = enableInterruptions, + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + transcriptHandler = transcriptHandler, + audioHandler = audioHandler, + enableInterruptions = enableInterruptions, + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation(audioHandler: ((AudioRecord, AudioTrack) -> Unit)?) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + audioHandler = audioHandler, + ) + } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 06b4a3efe25..ade1f1e26d5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -159,7 +159,7 @@ internal class AudioHelper( * constructor. */ @RequiresPermission(Manifest.permission.RECORD_AUDIO) - fun build(): AudioHelper { + fun build(audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null): AudioHelper { val playbackTrack = AudioTrack( AudioAttributes.Builder() @@ -209,6 +209,9 @@ internal class AudioHelper( AcousticEchoCanceler.create(recorder.audioSessionId)?.enabled = true } + if (audioHandler != null) { + audioHandler(recorder, playbackTrack) + } return AudioHelper(recorder, playbackTrack) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 37d6f5011cb..c187626a5c7 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -20,6 +20,7 @@ import android.Manifest.permission.RECORD_AUDIO import android.annotation.SuppressLint import android.content.pm.PackageManager import android.media.AudioFormat +import android.media.AudioRecord import android.media.AudioTrack import android.os.Process import android.os.StrictMode @@ -171,6 +172,39 @@ internal constructor( transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, enableInterruptions: Boolean = false, ) { + startAudioConversation(functionCallHandler, transcriptHandler, null, enableInterruptions) + } + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public suspend fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null, + enableInterruptions: Boolean = false, + ) { val context = firebaseApp.applicationContext if ( @@ -191,7 +225,7 @@ internal constructor( networkScope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network")) audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio")) - audioHelper = AudioHelper.build() + audioHelper = AudioHelper.build(audioHandler) recordUserAudio() processModelResponses(functionCallHandler, transcriptHandler) From e51cadd3fc13ae95bb6503c4870d29710daa4542 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 21 Oct 2025 17:13:04 -0400 Subject: [PATCH 2/8] update config --- .../firebase/ai/java/LiveSessionFutures.kt | 295 +----------------- .../firebase/ai/type/ConversationConfig.kt | 129 ++++++++ .../google/firebase/ai/type/LiveSession.kt | 43 +-- 3 files changed, 157 insertions(+), 310 deletions(-) create mode 100644 firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index d48d63fd659..67eeb22b849 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -17,12 +17,11 @@ package com.google.firebase.ai.java import android.Manifest.permission.RECORD_AUDIO -import android.media.AudioRecord -import android.media.AudioTrack import androidx.annotation.RequiresPermission import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.ai.type.Content +import com.google.firebase.ai.type.ConversationConfig import com.google.firebase.ai.type.FunctionCallPart import com.google.firebase.ai.type.FunctionResponsePart import com.google.firebase.ai.type.InlineData @@ -51,61 +50,16 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(): ListenableFuture - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? - ): ListenableFuture - /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation]. - * @param transcriptHandler A callback function that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - ): ListenableFuture - - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's - * ongoing reply. - * - * **WARNING**: The user interruption feature relies on device-specific support, and may not be - * consistently available. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture - - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param transcriptHandler A callback function that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription - * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's - * ongoing reply. - * - * **WARNING**: The user interruption feature relies on device-specific support, and may not be - * consistently available. + * @param conversationConfig A [ConversationConfig] provided by the user to control the various + * aspects of the conversation. */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - enableInterruptions: Boolean + conversationConfig: ConversationConfig ): ListenableFuture /** @@ -113,128 +67,29 @@ public abstract class LiveSessionFutures internal constructor() { * [stopAudioConversation] or [close]. * * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ): ListenableFuture - - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param transcriptHandler A callback function that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ): ListenableFuture - - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's - * ongoing reply. - * - * **WARNING**: The user interruption feature relies on device-specific support, and may not be - * consistently available. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ): ListenableFuture - - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. - * - * @param transcriptHandler A callback function that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. + * function call. */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): ListenableFuture /** * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. - * + * [stopAudioConversation]. * @param transcriptHandler A callback function that is invoked whenever the model receives a * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's - * ongoing reply. - * - * **WARNING**: The user interruption feature relies on device-specific support, and may not be - * consistently available. + * output transcription */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean ): ListenableFuture /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. * - * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - * * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. * @@ -242,11 +97,7 @@ public abstract class LiveSessionFutures internal constructor() { * consistently available. */ @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ): ListenableFuture + public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture /** * Starts an audio conversation with the model, which can only be stopped using @@ -254,12 +105,7 @@ public abstract class LiveSessionFutures internal constructor() { * * @param transcriptHandler A callback function that is invoked whenever the model receives a * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. + * output transcription * * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. @@ -270,24 +116,9 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, enableInterruptions: Boolean ): ListenableFuture - /** - * Starts an audio conversation with the model, which can only be stopped using - * [stopAudioConversation] or [close]. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - */ - @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation( - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ): ListenableFuture - /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. @@ -480,6 +311,10 @@ public abstract class LiveSessionFutures internal constructor() { session.startAudioConversation(transcriptHandler = transcriptHandler) } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation(conversationConfig: ConversationConfig) = + SuspendToFutureAdapter.launchFuture { session.startAudioConversation(conversationConfig) } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() } @@ -502,108 +337,6 @@ public abstract class LiveSessionFutures internal constructor() { ) } - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - functionCallHandler = functionCallHandler, - audioHandler = audioHandler - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - transcriptHandler = transcriptHandler, - audioHandler = audioHandler - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - audioHandler = audioHandler, - enableInterruptions = enableInterruptions - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - functionCallHandler = functionCallHandler, - transcriptHandler = transcriptHandler, - audioHandler = audioHandler, - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - functionCallHandler = functionCallHandler, - transcriptHandler = transcriptHandler, - audioHandler = audioHandler, - enableInterruptions = enableInterruptions - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - functionCallHandler = functionCallHandler, - audioHandler = audioHandler, - enableInterruptions = enableInterruptions, - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation( - transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - enableInterruptions: Boolean - ) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - transcriptHandler = transcriptHandler, - audioHandler = audioHandler, - enableInterruptions = enableInterruptions, - ) - } - - @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation(audioHandler: ((AudioRecord, AudioTrack) -> Unit)?) = - SuspendToFutureAdapter.launchFuture { - session.startAudioConversation( - audioHandler = audioHandler, - ) - } - @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt new file mode 100644 index 00000000000..d2f15fb06de --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt @@ -0,0 +1,129 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +import android.media.AudioRecord +import android.media.AudioTrack + +/** + * Configuration parameters to use for conversation config. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ +@PublicPreviewAPI +public class ConversationConfig +private constructor( + internal var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + internal var audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + internal var transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + internal var enableInterruptions: Boolean +) { + + /** + * Builder for creating a [ConversationConfig]. + * + * Mainly intended for Java interop. Kotlin consumers should use [conversationConfig] for a more + * idiomatic experience. + * + * @property functionCallHandler See [ConversationConfig.functionCallHandler]. + * + * @property audioHandler See [ConversationConfig.audioHandler]. + * + * @property transcriptHandler See [ConversationConfig.transcriptHandler]. + * + * @property enableInterruptions See [ConversationConfig.enableInterruptions]. + */ + public class Builder { + @JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null + @JvmField public var audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null + @JvmField public var transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null + @JvmField public var enableInterruptions: Boolean = false + + public fun setFunctionCallHandler( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? + ): Builder = apply { this.functionCallHandler = functionCallHandler } + + public fun setAudioHandler(audioHandler: ((AudioRecord, AudioTrack) -> Unit)?): Builder = + apply { + this.audioHandler = audioHandler + } + + public fun setTranscriptHandler( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)? + ): Builder = apply { this.transcriptHandler = transcriptHandler } + + public fun setEnableInterruptions(enableInterruptions: Boolean): Builder = apply { + this.enableInterruptions = enableInterruptions + } + + /** Create a new [ConversationConfig] with the attached arguments. */ + public fun build(): ConversationConfig = + ConversationConfig( + functionCallHandler = functionCallHandler, + audioHandler = audioHandler, + transcriptHandler = transcriptHandler, + enableInterruptions = enableInterruptions + ) + } + + public companion object { + + /** + * Alternative casing for [ConversationConfig.Builder]: + * ``` + * val config = ConversationConfig.builder() + * ``` + */ + public fun builder(): Builder = Builder() + } +} + +/** + * Helper method to construct a [ConversationConfig] in a DSL-like manner. + * + * Example Usage: + * ``` + * conversationConfig { + * functionCallHandler = ... + * audioHandler = ... + * ... + * } + * ``` + */ +@OptIn(PublicPreviewAPI::class) +public fun conversationConfig(init: ConversationConfig.Builder.() -> Unit): ConversationConfig { + val builder = ConversationConfig.builder() + builder.init() + return builder.build() +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index c187626a5c7..3a6c9743e8f 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -20,7 +20,6 @@ import android.Manifest.permission.RECORD_AUDIO import android.annotation.SuppressLint import android.content.pm.PackageManager import android.media.AudioFormat -import android.media.AudioRecord import android.media.AudioTrack import android.os.Process import android.os.StrictMode @@ -172,39 +171,22 @@ internal constructor( transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, enableInterruptions: Boolean = false, ) { - startAudioConversation(functionCallHandler, transcriptHandler, null, enableInterruptions) + val config = ConversationConfig.builder() + config.functionCallHandler = functionCallHandler + config.transcriptHandler = transcriptHandler + config.enableInterruptions = enableInterruptions + startAudioConversation(config.build()) } /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. * - * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. - * - * @param transcriptHandler A callback function that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. - * - * @param audioHandler A callback function that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain - * valid and effective for the duration of the current audio session. - * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's - * ongoing reply. - * - * **WARNING**: The user interruption feature relies on device-specific support, and may not be - * consistently available. + * @param conversationConfig A [ConversationConfig] provided by the user to control the various + * aspects of the conversation. */ @RequiresPermission(RECORD_AUDIO) - public suspend fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, - transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, - audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null, - enableInterruptions: Boolean = false, - ) { + public suspend fun startAudioConversation(conversationConfig: ConversationConfig) { val context = firebaseApp.applicationContext if ( @@ -225,11 +207,14 @@ internal constructor( networkScope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network")) audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio")) - audioHelper = AudioHelper.build(audioHandler) + audioHelper = AudioHelper.build(conversationConfig.audioHandler) recordUserAudio() - processModelResponses(functionCallHandler, transcriptHandler) - listenForModelPlayback(enableInterruptions) + processModelResponses( + conversationConfig.functionCallHandler, + conversationConfig.transcriptHandler + ) + listenForModelPlayback(conversationConfig.enableInterruptions) } } From ec1c56cf993b9a468b411fb2c46d0b1c50f43e1b Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 21 Oct 2025 17:14:44 -0400 Subject: [PATCH 3/8] update api text file --- firebase-ai/api.txt | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 071349cfe9d..e9ea5abe68b 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -152,19 +152,12 @@ package com.google.firebase.ai.java { method public abstract com.google.common.util.concurrent.ListenableFuture sendVideoRealtime(com.google.firebase.ai.type.InlineData video); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? audioHandler); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? audioHandler); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, kotlin.jvm.functions.Function2? audioHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -297,6 +290,31 @@ package com.google.firebase.ai.type { public static final class ContentModality.Companion { } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ConversationConfig { + field public static final com.google.firebase.ai.type.ConversationConfig.Companion Companion; + } + + public static final class ConversationConfig.Builder { + ctor public ConversationConfig.Builder(); + method public com.google.firebase.ai.type.ConversationConfig build(); + method public com.google.firebase.ai.type.ConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2? audioHandler); + method public com.google.firebase.ai.type.ConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions); + method public com.google.firebase.ai.type.ConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1? functionCallHandler); + method public com.google.firebase.ai.type.ConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2? transcriptHandler); + field public kotlin.jvm.functions.Function2? audioHandler; + field public boolean enableInterruptions; + field public kotlin.jvm.functions.Function1? functionCallHandler; + field public kotlin.jvm.functions.Function2? transcriptHandler; + } + + public static final class ConversationConfig.Companion { + method public com.google.firebase.ai.type.ConversationConfig.Builder builder(); + } + + public final class ConversationConfigKt { + method public static com.google.firebase.ai.type.ConversationConfig conversationConfig(kotlin.jvm.functions.Function1 init); + } + public final class CountTokensResponse { ctor public CountTokensResponse(int totalTokens, @Deprecated Integer? totalBillableCharacters = null, java.util.List promptTokensDetails = emptyList()); method public operator int component1(); @@ -930,10 +948,10 @@ package com.google.firebase.ai.type { method @Deprecated public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation); method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, kotlin.jvm.functions.Function2? audioHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } From 8d4a98d20aaa0dfd742aadec20bb004e8ef70595 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 22 Oct 2025 11:59:05 -0400 Subject: [PATCH 4/8] fix kotlin docs --- .../com/google/firebase/ai/type/ConversationConfig.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt index d2f15fb06de..e22df3cef97 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt @@ -22,20 +22,20 @@ import android.media.AudioTrack /** * Configuration parameters to use for conversation config. * - * @param functionCallHandler A callback function that is invoked whenever the model receives a + * @property functionCallHandler A callback function that is invoked whenever the model receives a * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. * - * @param transcriptHandler A callback function that is invoked whenever the model receives a + * @property transcriptHandler A callback function that is invoked whenever the model receives a * transcript. The first [Transcription] object is the input transcription, and the second is the * output transcription. * - * @param audioHandler A callback function that is invoked immediately following the successful + * @property audioHandler A callback function that is invoked immediately following the successful * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final * opportunity to apply custom configurations or modifications to these objects, which will remain * valid and effective for the duration of the current audio session. * - * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. * * **WARNING**: The user interruption feature relies on device-specific support, and may not be From f2afcfd437bc91980c86dadf25483c7ded078042 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 22 Oct 2025 13:07:42 -0400 Subject: [PATCH 5/8] Rename audioconversation config and address some comments --- ...onfig.kt => LiveAudioConversationConfig.kt} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename firebase-ai/src/main/kotlin/com/google/firebase/ai/type/{ConversationConfig.kt => LiveAudioConversationConfig.kt} (86%) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt similarity index 86% rename from firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt rename to firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt index e22df3cef97..e072f625146 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ConversationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt @@ -22,17 +22,17 @@ import android.media.AudioTrack /** * Configuration parameters to use for conversation config. * - * @property functionCallHandler A callback function that is invoked whenever the model receives a + * @property functionCallHandler A callback that is invoked whenever the model receives a * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. * - * @property transcriptHandler A callback function that is invoked whenever the model receives a + * @property transcriptHandler A callback that is invoked whenever the model receives a * transcript. The first [Transcription] object is the input transcription, and the second is the * output transcription. * - * @property audioHandler A callback function that is invoked immediately following the successful + * @property audioHandler A callback that is invoked immediately following the successful * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to apply custom configurations or modifications to these objects, which will remain + * opportunity to configure these objects, which will remain * valid and effective for the duration of the current audio session. * * @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's @@ -42,12 +42,12 @@ import android.media.AudioTrack * consistently available. */ @PublicPreviewAPI -public class ConversationConfig +public class LiveConversationConfig private constructor( - internal var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - internal var audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, - internal var transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, - internal var enableInterruptions: Boolean + internal val functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + internal val audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + internal val transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + internal val enableInterruptions: Boolean ) { /** From 723077a9b968aa2f8e3dfb44e5857a44550efa2c Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 22 Oct 2025 13:11:24 -0400 Subject: [PATCH 6/8] update --- firebase-ai/api.txt | 54 +++++++++---------- .../firebase/ai/java/LiveSessionFutures.kt | 14 ++--- .../ai/type/LiveAudioConversationConfig.kt | 52 +++++++++--------- .../google/firebase/ai/type/LiveSession.kt | 28 +++++----- 4 files changed, 78 insertions(+), 70 deletions(-) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index e9ea5abe68b..ed3d7135138 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -152,7 +152,7 @@ package com.google.firebase.ai.java { method public abstract com.google.common.util.concurrent.ListenableFuture sendVideoRealtime(com.google.firebase.ai.type.InlineData video); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); @@ -290,31 +290,6 @@ package com.google.firebase.ai.type { public static final class ContentModality.Companion { } - @com.google.firebase.ai.type.PublicPreviewAPI public final class ConversationConfig { - field public static final com.google.firebase.ai.type.ConversationConfig.Companion Companion; - } - - public static final class ConversationConfig.Builder { - ctor public ConversationConfig.Builder(); - method public com.google.firebase.ai.type.ConversationConfig build(); - method public com.google.firebase.ai.type.ConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2? audioHandler); - method public com.google.firebase.ai.type.ConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions); - method public com.google.firebase.ai.type.ConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1? functionCallHandler); - method public com.google.firebase.ai.type.ConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2? transcriptHandler); - field public kotlin.jvm.functions.Function2? audioHandler; - field public boolean enableInterruptions; - field public kotlin.jvm.functions.Function1? functionCallHandler; - field public kotlin.jvm.functions.Function2? transcriptHandler; - } - - public static final class ConversationConfig.Companion { - method public com.google.firebase.ai.type.ConversationConfig.Builder builder(); - } - - public final class ConversationConfigKt { - method public static com.google.firebase.ai.type.ConversationConfig conversationConfig(kotlin.jvm.functions.Function1 init); - } - public final class CountTokensResponse { ctor public CountTokensResponse(int totalTokens, @Deprecated Integer? totalBillableCharacters = null, java.util.List promptTokensDetails = emptyList()); method public operator int component1(); @@ -864,6 +839,31 @@ package com.google.firebase.ai.type { public final class InvalidStateException extends com.google.firebase.ai.type.FirebaseAIException { } + @com.google.firebase.ai.type.PublicPreviewAPI public final class LiveAudioConversationConfig { + field public static final com.google.firebase.ai.type.LiveAudioConversationConfig.Companion Companion; + } + + public static final class LiveAudioConversationConfig.Builder { + ctor public LiveAudioConversationConfig.Builder(); + method public com.google.firebase.ai.type.LiveAudioConversationConfig build(); + method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2? audioHandler); + method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions); + method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1? functionCallHandler); + method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2? transcriptHandler); + field public kotlin.jvm.functions.Function2? audioHandler; + field public boolean enableInterruptions; + field public kotlin.jvm.functions.Function1? functionCallHandler; + field public kotlin.jvm.functions.Function2? transcriptHandler; + } + + public static final class LiveAudioConversationConfig.Companion { + method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder builder(); + } + + public final class LiveAudioConversationConfigKt { + method public static com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig(kotlin.jvm.functions.Function1 init); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class LiveGenerationConfig { field public static final com.google.firebase.ai.type.LiveGenerationConfig.Companion Companion; } @@ -948,7 +948,7 @@ package com.google.firebase.ai.type { method @Deprecated public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation); method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 67eeb22b849..f84b1e7f6d0 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -21,10 +21,10 @@ import androidx.annotation.RequiresPermission import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.ai.type.Content -import com.google.firebase.ai.type.ConversationConfig import com.google.firebase.ai.type.FunctionCallPart import com.google.firebase.ai.type.FunctionResponsePart import com.google.firebase.ai.type.InlineData +import com.google.firebase.ai.type.LiveAudioConversationConfig import com.google.firebase.ai.type.LiveServerMessage import com.google.firebase.ai.type.LiveSession import com.google.firebase.ai.type.MediaData @@ -54,12 +54,12 @@ public abstract class LiveSessionFutures internal constructor() { * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation]. * - * @param conversationConfig A [ConversationConfig] provided by the user to control the various - * aspects of the conversation. + * @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to + * control the various aspects of the conversation. */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( - conversationConfig: ConversationConfig + liveAudioConversationConfig: LiveAudioConversationConfig ): ListenableFuture /** @@ -312,8 +312,10 @@ public abstract class LiveSessionFutures internal constructor() { } @RequiresPermission(RECORD_AUDIO) - override fun startAudioConversation(conversationConfig: ConversationConfig) = - SuspendToFutureAdapter.launchFuture { session.startAudioConversation(conversationConfig) } + override fun startAudioConversation(liveAudioConversationConfig: LiveAudioConversationConfig) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation(liveAudioConversationConfig) + } @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation() = diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt index e072f625146..4fd66446e19 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt @@ -22,18 +22,18 @@ import android.media.AudioTrack /** * Configuration parameters to use for conversation config. * - * @property functionCallHandler A callback that is invoked whenever the model receives a - * function call. The [FunctionResponsePart] that the callback function returns will be - * automatically sent to the model. + * @property functionCallHandler A callback that is invoked whenever the model receives a function + * call. The [FunctionResponsePart] that the callback function returns will be automatically sent to + * the model. * - * @property transcriptHandler A callback that is invoked whenever the model receives a - * transcript. The first [Transcription] object is the input transcription, and the second is the - * output transcription. + * @property transcriptHandler A callback that is invoked whenever the model receives a transcript. + * The first [Transcription] object is the input transcription, and the second is the output + * transcription. * * @property audioHandler A callback that is invoked immediately following the successful * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to configure these objects, which will remain - * valid and effective for the duration of the current audio session. + * opportunity to configure these objects, which will remain valid and effective for the duration of + * the current audio session. * * @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. @@ -42,7 +42,7 @@ import android.media.AudioTrack * consistently available. */ @PublicPreviewAPI -public class LiveConversationConfig +public class LiveAudioConversationConfig private constructor( internal val functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, internal val audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, @@ -51,18 +51,18 @@ private constructor( ) { /** - * Builder for creating a [ConversationConfig]. + * Builder for creating a [LiveAudioConversationConfig]. * - * Mainly intended for Java interop. Kotlin consumers should use [conversationConfig] for a more - * idiomatic experience. + * Mainly intended for Java interop. Kotlin consumers should use [liveAudioConversationConfig] for + * a more idiomatic experience. * - * @property functionCallHandler See [ConversationConfig.functionCallHandler]. + * @property functionCallHandler See [LiveAudioConversationConfig.functionCallHandler]. * - * @property audioHandler See [ConversationConfig.audioHandler]. + * @property audioHandler See [LiveAudioConversationConfig.audioHandler]. * - * @property transcriptHandler See [ConversationConfig.transcriptHandler]. + * @property transcriptHandler See [LiveAudioConversationConfig.transcriptHandler]. * - * @property enableInterruptions See [ConversationConfig.enableInterruptions]. + * @property enableInterruptions See [LiveAudioConversationConfig.enableInterruptions]. */ public class Builder { @JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null @@ -87,9 +87,9 @@ private constructor( this.enableInterruptions = enableInterruptions } - /** Create a new [ConversationConfig] with the attached arguments. */ - public fun build(): ConversationConfig = - ConversationConfig( + /** Create a new [LiveAudioConversationConfig] with the attached arguments. */ + public fun build(): LiveAudioConversationConfig = + LiveAudioConversationConfig( functionCallHandler = functionCallHandler, audioHandler = audioHandler, transcriptHandler = transcriptHandler, @@ -100,9 +100,9 @@ private constructor( public companion object { /** - * Alternative casing for [ConversationConfig.Builder]: + * Alternative casing for [LiveAudioConversationConfig.Builder]: * ``` - * val config = ConversationConfig.builder() + * val config = LiveAudioConversationConfig.builder() * ``` */ public fun builder(): Builder = Builder() @@ -110,11 +110,11 @@ private constructor( } /** - * Helper method to construct a [ConversationConfig] in a DSL-like manner. + * Helper method to construct a [LiveAudioConversationConfig] in a DSL-like manner. * * Example Usage: * ``` - * conversationConfig { + * liveAudioConversationConfig { * functionCallHandler = ... * audioHandler = ... * ... @@ -122,8 +122,10 @@ private constructor( * ``` */ @OptIn(PublicPreviewAPI::class) -public fun conversationConfig(init: ConversationConfig.Builder.() -> Unit): ConversationConfig { - val builder = ConversationConfig.builder() +public fun liveAudioConversationConfig( + init: LiveAudioConversationConfig.Builder.() -> Unit +): LiveAudioConversationConfig { + val builder = LiveAudioConversationConfig.builder() builder.init() return builder.build() } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 3a6c9743e8f..a497a01b224 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -171,22 +171,26 @@ internal constructor( transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, enableInterruptions: Boolean = false, ) { - val config = ConversationConfig.builder() - config.functionCallHandler = functionCallHandler - config.transcriptHandler = transcriptHandler - config.enableInterruptions = enableInterruptions - startAudioConversation(config.build()) + startAudioConversation( + liveAudioConversationConfig { + this.functionCallHandler = functionCallHandler + this.transcriptHandler = transcriptHandler + this.enableInterruptions = enableInterruptions + } + ) } /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. * - * @param conversationConfig A [ConversationConfig] provided by the user to control the various - * aspects of the conversation. + * @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to + * control the various aspects of the conversation. */ @RequiresPermission(RECORD_AUDIO) - public suspend fun startAudioConversation(conversationConfig: ConversationConfig) { + public suspend fun startAudioConversation( + liveAudioConversationConfig: LiveAudioConversationConfig + ) { val context = firebaseApp.applicationContext if ( @@ -207,14 +211,14 @@ internal constructor( networkScope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network")) audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio")) - audioHelper = AudioHelper.build(conversationConfig.audioHandler) + audioHelper = AudioHelper.build(liveAudioConversationConfig.audioHandler) recordUserAudio() processModelResponses( - conversationConfig.functionCallHandler, - conversationConfig.transcriptHandler + liveAudioConversationConfig.functionCallHandler, + liveAudioConversationConfig.transcriptHandler ) - listenForModelPlayback(conversationConfig.enableInterruptions) + listenForModelPlayback(liveAudioConversationConfig.enableInterruptions) } } From 6d4d67cb3705e19244d7c2287b864e532f1a2689 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Fri, 24 Oct 2025 13:17:17 -0400 Subject: [PATCH 7/8] use builder configuration instead --- firebase-ai/CHANGELOG.md | 2 + firebase-ai/gradle.properties | 2 +- .../google/firebase/ai/type/AudioHelper.kt | 60 ++++++++++++------- .../ai/type/LiveAudioConversationConfig.kt | 8 +-- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index abf0bf55c68..dee8bfb1563 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,5 +1,7 @@ # Unreleased +- [changed] Added `LiveAudioConversationConfig` to control different aspects of the conversation + while using the `startAudioConversation` function. - [changed] Added better scheduling and louder output for Live API. - [changed] Added support for input and output transcription. (#7482) - [feature] Added support for sending realtime audio and video in a `LiveSession`. diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index c4acd5b3aae..388149e856a 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.5.0 +version=99.9.9 latestReleasedVersion=17.4.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index ade1f1e26d5..e907315fb0e 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -19,7 +19,6 @@ package com.google.firebase.ai.type import android.Manifest import android.media.AudioAttributes import android.media.AudioFormat -import android.media.AudioManager import android.media.AudioRecord import android.media.AudioTrack import android.media.MediaRecorder @@ -157,28 +156,39 @@ internal class AudioHelper( * * It also makes it easier to read, since the long initialization is separate from the * constructor. + * + * @param audioHandler A callback that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to configure these objects, which will remain valid and effective for the + * duration of the current audio session. */ @RequiresPermission(Manifest.permission.RECORD_AUDIO) - fun build(audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null): AudioHelper { - val playbackTrack = - AudioTrack( - AudioAttributes.Builder() - .setUsage(AudioAttributes.USAGE_MEDIA) - .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) - .build(), + fun build( + audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null + ): AudioHelper { + val playTrackBuilder = AudioTrack.Builder() + playTrackBuilder + .setAudioFormat( AudioFormat.Builder() .setSampleRate(24000) .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) .setEncoding(AudioFormat.ENCODING_PCM_16BIT) - .build(), + .build() + ) + .setAudioAttributes( + AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_MEDIA) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build() + ) + .setBufferSizeInBytes( AudioTrack.getMinBufferSize( 24000, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT ), - AudioTrack.MODE_STREAM, - AudioManager.AUDIO_SESSION_ID_GENERATE ) + .setTransferMode(AudioTrack.MODE_STREAM) val bufferSize = AudioRecord.getMinBufferSize( @@ -191,15 +201,22 @@ internal class AudioHelper( throw AudioRecordInitializationFailedException( "Audio Record buffer size is invalid ($bufferSize)" ) - - val recorder = - AudioRecord( - MediaRecorder.AudioSource.VOICE_COMMUNICATION, - 16000, - AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) + val recorderBuilder = + AudioRecord.Builder() + .setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION) + .setAudioFormat( + AudioFormat.Builder() + .setEncoding(AudioFormat.ENCODING_PCM_16BIT) + .setSampleRate(16000) + .setChannelMask(AudioFormat.CHANNEL_IN_MONO) + .build() + ) + .setBufferSizeInBytes(bufferSize) + if (audioHandler != null) { + audioHandler(recorderBuilder, playTrackBuilder) + } + val recorder = recorderBuilder.build() + val playbackTrack = playTrackBuilder.build() if (recorder.state != AudioRecord.STATE_INITIALIZED) throw AudioRecordInitializationFailedException( "Audio Record initialization has failed. State: ${recorder.state}" @@ -209,9 +226,6 @@ internal class AudioHelper( AcousticEchoCanceler.create(recorder.audioSessionId)?.enabled = true } - if (audioHandler != null) { - audioHandler(recorder, playbackTrack) - } return AudioHelper(recorder, playbackTrack) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt index 4fd66446e19..f106c4de0f3 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt @@ -31,9 +31,9 @@ import android.media.AudioTrack * transcription. * * @property audioHandler A callback that is invoked immediately following the successful - * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final - * opportunity to configure these objects, which will remain valid and effective for the duration of - * the current audio session. + * initialization of the associated [AudioRecord.Builder] and [AudioTrack.Builder] objects. This + * offers a final opportunity to configure these objects, which will remain valid and effective for + * the duration of the current audio session. * * @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. @@ -45,7 +45,7 @@ import android.media.AudioTrack public class LiveAudioConversationConfig private constructor( internal val functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, - internal val audioHandler: ((AudioRecord, AudioTrack) -> Unit)?, + internal val audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)?, internal val transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, internal val enableInterruptions: Boolean ) { From d01a970743d8473a6e5ae10e24b04ee10558b20c Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Fri, 24 Oct 2025 13:21:16 -0400 Subject: [PATCH 8/8] update --- .../firebase/ai/type/LiveAudioConversationConfig.kt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt index f106c4de0f3..3ac00eca76b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveAudioConversationConfig.kt @@ -66,7 +66,7 @@ private constructor( */ public class Builder { @JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null - @JvmField public var audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null + @JvmField public var audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null @JvmField public var transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null @JvmField public var enableInterruptions: Boolean = false @@ -74,10 +74,9 @@ private constructor( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): Builder = apply { this.functionCallHandler = functionCallHandler } - public fun setAudioHandler(audioHandler: ((AudioRecord, AudioTrack) -> Unit)?): Builder = - apply { - this.audioHandler = audioHandler - } + public fun setAudioHandler( + audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? + ): Builder = apply { this.audioHandler = audioHandler } public fun setTranscriptHandler( transcriptHandler: ((Transcription?, Transcription?) -> Unit)?