diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index ba27e5682d8..f73c51d7112 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -154,6 +154,9 @@ package com.google.firebase.ai.java { method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function2? transcriptHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -174,6 +177,10 @@ package com.google.firebase.ai.type { ctor public AudioRecordInitializationFailedException(String message); } + public final class AudioTranscriptionConfig { + ctor public AudioTranscriptionConfig(); + } + public final class BlockReason { method public String getName(); method public int getOrdinal(); @@ -839,7 +846,9 @@ package com.google.firebase.ai.type { ctor public LiveGenerationConfig.Builder(); method public com.google.firebase.ai.type.LiveGenerationConfig build(); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty); + method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setInputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens); + method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig); @@ -847,7 +856,9 @@ package com.google.firebase.ai.type { method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopK(Integer? topK); method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopP(Float? topP); field public Float? frequencyPenalty; + field public com.google.firebase.ai.type.AudioTranscriptionConfig? inputAudioTranscription; field public Integer? maxOutputTokens; + field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription; field public Float? presencePenalty; field public com.google.firebase.ai.type.ResponseModality? responseModality; field public com.google.firebase.ai.type.SpeechConfig? speechConfig; @@ -865,14 +876,18 @@ package com.google.firebase.ai.type { } @com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage { - ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete); + ctor @Deprecated public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription); method public com.google.firebase.ai.type.Content? getContent(); method public boolean getGenerationComplete(); + method public com.google.firebase.ai.type.Transcription? getInputTranscription(); method public boolean getInterrupted(); + method public com.google.firebase.ai.type.Transcription? getOutputTranscription(); method public boolean getTurnComplete(); property public final com.google.firebase.ai.type.Content? content; property public final boolean generationComplete; + property public final com.google.firebase.ai.type.Transcription? inputTranscription; property public final boolean interrupted; + property public final com.google.firebase.ai.type.Transcription? outputTranscription; property public final boolean turnComplete; } @@ -909,6 +924,7 @@ package com.google.firebase.ai.type { method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.jvm.functions.Function2? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } @@ -1235,6 +1251,11 @@ package com.google.firebase.ai.type { ctor public ToolConfig(com.google.firebase.ai.type.FunctionCallingConfig? functionCallingConfig); } + public final class Transcription { + method public String? getText(); + property public final String? text; + } + public final class UnknownException extends com.google.firebase.ai.type.FirebaseAIException { } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt index d5afca6b960..b0a1b541c6b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt @@ -111,7 +111,9 @@ internal constructor( modelName, config?.toInternal(), tools?.map { it.toInternal() }, - systemInstruction?.toInternal() + systemInstruction?.toInternal(), + config?.inputAudioTranscription?.toInternal(), + config?.outputAudioTranscription?.toInternal() ) .toInternal() val data: String = Json.encodeToString(clientMessage) @@ -135,7 +137,7 @@ internal constructor( } catch (e: ClosedReceiveChannelException) { val reason = webSession?.closeReason?.await() val message = - "Channel was closed by the server.${if(reason!=null) " Details: ${reason.message}" else "" }" + "Channel was closed by the server.${if (reason != null) " Details: ${reason.message}" else ""}" throw ServiceConnectionHandshakeFailedException(message, e) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 2fb74689643..5a04ed9f97c 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -29,6 +29,7 @@ import com.google.firebase.ai.type.LiveSession import com.google.firebase.ai.type.MediaData import com.google.firebase.ai.type.PublicPreviewAPI import com.google.firebase.ai.type.SessionAlreadyReceivingException +import com.google.firebase.ai.type.Transcription import io.ktor.websocket.close import kotlinx.coroutines.reactive.asPublisher import org.reactivestreams.Publisher @@ -41,6 +42,13 @@ import org.reactivestreams.Publisher @PublicPreviewAPI public abstract class LiveSessionFutures internal constructor() { + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation]. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation(): ListenableFuture + /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. @@ -56,9 +64,14 @@ public abstract class LiveSessionFutures internal constructor() { /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation]. + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription */ @RequiresPermission(RECORD_AUDIO) - public abstract fun startAudioConversation(): ListenableFuture + public abstract fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + ): ListenableFuture /** * Starts an audio conversation with the model, which can only be stopped using @@ -73,6 +86,26 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + /** * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. @@ -92,6 +125,30 @@ public abstract class LiveSessionFutures internal constructor() { enableInterruptions: Boolean ): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + enableInterruptions: Boolean + ): ListenableFuture + /** * Stops the audio conversation with the Gemini Server. * @@ -233,6 +290,14 @@ public abstract class LiveSessionFutures internal constructor() { functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)? + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation(transcriptHandler = transcriptHandler) + } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() } @@ -243,6 +308,32 @@ public abstract class LiveSessionFutures internal constructor() { session.startAudioConversation(enableInterruptions = enableInterruptions) } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + transcriptHandler = transcriptHandler, + enableInterruptions = enableInterruptions + ) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler = functionCallHandler, + transcriptHandler = transcriptHandler, + enableInterruptions = enableInterruptions + ) + } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt new file mode 100644 index 00000000000..406af4d4c6f --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt @@ -0,0 +1,27 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +/** The audio transcription configuration. Its presence enables audio transcription */ +public class AudioTranscriptionConfig { + + @Serializable internal object Internal + + internal fun toInternal() = Internal +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt index 36e06b184e8..856eebbdde5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt @@ -32,7 +32,9 @@ internal class LiveClientSetupMessage( // needs its own config class val generationConfig: LiveGenerationConfig.Internal?, val tools: List?, - val systemInstruction: Content.Internal? + val systemInstruction: Content.Internal?, + val inputAudioTranscription: AudioTranscriptionConfig.Internal?, + val outputAudioTranscription: AudioTranscriptionConfig.Internal?, ) { @Serializable internal class Internal(val setup: LiveClientSetup) { @@ -41,10 +43,21 @@ internal class LiveClientSetupMessage( val model: String, val generationConfig: LiveGenerationConfig.Internal?, val tools: List?, - val systemInstruction: Content.Internal? + val systemInstruction: Content.Internal?, + val inputAudioTranscription: AudioTranscriptionConfig.Internal?, + val outputAudioTranscription: AudioTranscriptionConfig.Internal?, ) } fun toInternal() = - Internal(Internal.LiveClientSetup(model, generationConfig, tools, systemInstruction)) + Internal( + Internal.LiveClientSetup( + model, + generationConfig, + tools, + systemInstruction, + inputAudioTranscription, + outputAudioTranscription + ) + ) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt index eb9dcc716c9..3e014d43162 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt @@ -53,6 +53,11 @@ import kotlinx.serialization.Serializable * * @property speechConfig Specifies the voice configuration of the audio response from the server. * + * @property inputAudioTranscription Specifies the configuration for transcribing input audio. + * + * @property outputAudioTranscription Specifies the configuration for transcribing output audio from + * the model. + * * Refer to the * [Control generated output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output) * guide for more details. @@ -67,7 +72,9 @@ private constructor( internal val presencePenalty: Float?, internal val frequencyPenalty: Float?, internal val responseModality: ResponseModality?, - internal val speechConfig: SpeechConfig? + internal val speechConfig: SpeechConfig?, + internal val inputAudioTranscription: AudioTranscriptionConfig?, + internal val outputAudioTranscription: AudioTranscriptionConfig?, ) { /** @@ -91,6 +98,10 @@ private constructor( * @property responseModality See [LiveGenerationConfig.responseModality] * * @property speechConfig See [LiveGenerationConfig.speechConfig] + * + * @property inputAudioTranscription see [LiveGenerationConfig.inputAudioTranscription] + * + * @property outputAudioTranscription see [LiveGenerationConfig.outputAudioTranscription] */ public class Builder { @JvmField public var temperature: Float? = null @@ -101,6 +112,8 @@ private constructor( @JvmField public var frequencyPenalty: Float? = null @JvmField public var responseModality: ResponseModality? = null @JvmField public var speechConfig: SpeechConfig? = null + @JvmField public var inputAudioTranscription: AudioTranscriptionConfig? = null + @JvmField public var outputAudioTranscription: AudioTranscriptionConfig? = null public fun setTemperature(temperature: Float?): Builder = apply { this.temperature = temperature @@ -123,6 +136,14 @@ private constructor( this.speechConfig = speechConfig } + public fun setInputAudioTranscription(config: AudioTranscriptionConfig?): Builder = apply { + this.inputAudioTranscription = config + } + + public fun setOutputAudioTranscription(config: AudioTranscriptionConfig?): Builder = apply { + this.outputAudioTranscription = config + } + /** Create a new [LiveGenerationConfig] with the attached arguments. */ public fun build(): LiveGenerationConfig = LiveGenerationConfig( @@ -133,7 +154,9 @@ private constructor( presencePenalty = presencePenalty, frequencyPenalty = frequencyPenalty, speechConfig = speechConfig, - responseModality = responseModality + responseModality = responseModality, + inputAudioTranscription = inputAudioTranscription, + outputAudioTranscription = outputAudioTranscription, ) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt index 5cabe593bd6..a250f4a13c9 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt @@ -42,7 +42,9 @@ import kotlinx.serialization.json.jsonObject * play it out in realtime. */ @PublicPreviewAPI -public class LiveServerContent( +public class LiveServerContent +@Deprecated("This class should not be constructed, only received from the Server") +public constructor( /** * The content that the model has generated as part of the current conversation with the user. * @@ -82,25 +84,43 @@ public class LiveServerContent( * [interrupted] -> [turnComplete]. */ public val generationComplete: Boolean, + + /** + * The input transcription. The transcription is independent to the model turn which means it + * doesn't imply any ordering between transcription and model turn. + */ + public val inputTranscription: Transcription?, + + /** + * The output transcription. The transcription is independent to the model turn which means it + * doesn't imply any ordering between transcription and model turn. + */ + public val outputTranscription: Transcription? ) : LiveServerMessage { @OptIn(ExperimentalSerializationApi::class) @Serializable internal data class Internal( - val modelTurn: Content.Internal? = null, - val interrupted: Boolean = false, - val turnComplete: Boolean = false, - val generationComplete: Boolean = false + val modelTurn: Content.Internal?, + val interrupted: Boolean?, + val turnComplete: Boolean?, + val generationComplete: Boolean?, + val inputTranscription: Transcription.Internal?, + val outputTranscription: Transcription.Internal? ) @Serializable internal data class InternalWrapper(val serverContent: Internal) : InternalLiveServerMessage { @OptIn(ExperimentalSerializationApi::class) - override fun toPublic() = - LiveServerContent( + override fun toPublic(): LiveServerContent { + // WhenMajor(Revisit the decision to make these have default values) + return LiveServerContent( serverContent.modelTurn?.toPublic(), - serverContent.interrupted, - serverContent.turnComplete, - serverContent.generationComplete + serverContent.interrupted ?: false, + serverContent.turnComplete ?: false, + serverContent.generationComplete ?: false, + serverContent.inputTranscription?.toPublic(), + serverContent.outputTranscription?.toPublic() ) + } } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 0e6796ab01b..9e8b7d7f683 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -28,7 +28,6 @@ import com.google.firebase.ai.common.JSON import com.google.firebase.ai.common.util.CancelledCoroutineScope import com.google.firebase.ai.common.util.accumulateUntil import com.google.firebase.ai.common.util.childJob -import com.google.firebase.ai.type.MediaData.Internal import com.google.firebase.annotations.concurrent.Blocking import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession import io.ktor.websocket.Frame @@ -120,6 +119,37 @@ internal constructor( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, enableInterruptions: Boolean = false, ) { + startAudioConversation( + functionCallHandler = functionCallHandler, + transcriptHandler = null, + enableInterruptions = enableInterruptions + ) + } + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) + public suspend fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, + enableInterruptions: Boolean = false, + ) { val context = firebaseApp.applicationContext if ( @@ -142,7 +172,7 @@ internal constructor( audioHelper = AudioHelper.build() recordUserAudio() - processModelResponses(functionCallHandler) + processModelResponses(functionCallHandler, transcriptHandler) listenForModelPlayback(enableInterruptions) } } @@ -390,7 +420,8 @@ internal constructor( * function call. */ private fun processModelResponses( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)? ) { receive() .onEach { @@ -419,6 +450,9 @@ internal constructor( ) } is LiveServerContent -> { + if (it.inputTranscription != null || it.outputTranscription != null) { + transcriptHandler?.invoke(it.inputTranscription, it.outputTranscription) + } if (it.interrupted) { playBackQueue.clear() } else { diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt new file mode 100644 index 00000000000..6dc65e5abdb --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +/** + * Audio transcription message. + * @property text Transcription text + */ +public class Transcription internal constructor(public val text: String?) { + + @Serializable + internal data class Internal(val text: String?) { + fun toPublic(): Transcription { + return Transcription(text) + } + } +}