Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ package com.google.firebase.ai.java {
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
method public abstract void stopReceiving();
field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
Expand All @@ -174,6 +177,10 @@ package com.google.firebase.ai.type {
ctor public AudioRecordInitializationFailedException(String message);
}

public final class AudioTranscriptionConfig {
ctor public AudioTranscriptionConfig();
}

public final class BlockReason {
method public String getName();
method public int getOrdinal();
Expand Down Expand Up @@ -839,15 +846,19 @@ package com.google.firebase.ai.type {
ctor public LiveGenerationConfig.Builder();
method public com.google.firebase.ai.type.LiveGenerationConfig build();
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setInputAudioTranscript(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscript(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTemperature(Float? temperature);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopK(Integer? topK);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopP(Float? topP);
field public Float? frequencyPenalty;
field public com.google.firebase.ai.type.AudioTranscriptionConfig? inputAudioTranscription;
field public Integer? maxOutputTokens;
field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription;
field public Float? presencePenalty;
field public com.google.firebase.ai.type.ResponseModality? responseModality;
field public com.google.firebase.ai.type.SpeechConfig? speechConfig;
Expand All @@ -865,14 +876,18 @@ package com.google.firebase.ai.type {
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage {
ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete);
ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription);
method public com.google.firebase.ai.type.Content? getContent();
method public boolean getGenerationComplete();
method public com.google.firebase.ai.type.Transcription? getInputTranscription();
method public boolean getInterrupted();
method public com.google.firebase.ai.type.Transcription? getOutputTranscription();
method public boolean getTurnComplete();
property public final com.google.firebase.ai.type.Content? content;
property public final boolean generationComplete;
property public final com.google.firebase.ai.type.Transcription? inputTranscription;
property public final boolean interrupted;
property public final com.google.firebase.ai.type.Transcription? outputTranscription;
property public final boolean turnComplete;
}

Expand Down Expand Up @@ -909,6 +924,7 @@ package com.google.firebase.ai.type {
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public void stopAudioConversation();
method public void stopReceiving();
}
Expand Down Expand Up @@ -1235,6 +1251,12 @@ package com.google.firebase.ai.type {
ctor public ToolConfig(com.google.firebase.ai.type.FunctionCallingConfig? functionCallingConfig);
}

public final class Transcription {
ctor public Transcription(String? text);
method public String? getText();
property public final String? text;
}

public final class UnknownException extends com.google.firebase.ai.type.FirebaseAIException {
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ internal constructor(
modelName,
config?.toInternal(),
tools?.map { it.toInternal() },
systemInstruction?.toInternal()
systemInstruction?.toInternal(),
config?.inputAudioTranscription?.toInternal(),
config?.outputAudioTranscription?.toInternal()
)
.toInternal()
val data: String = Json.encodeToString(clientMessage)
Expand All @@ -135,7 +137,7 @@ internal constructor(
} catch (e: ClosedReceiveChannelException) {
val reason = webSession?.closeReason?.await()
val message =
"Channel was closed by the server.${if(reason!=null) " Details: ${reason.message}" else "" }"
"Channel was closed by the server.${if (reason != null) " Details: ${reason.message}" else ""}"
throw ServiceConnectionHandshakeFailedException(message, e)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import com.google.firebase.ai.type.LiveSession
import com.google.firebase.ai.type.MediaData
import com.google.firebase.ai.type.PublicPreviewAPI
import com.google.firebase.ai.type.SessionAlreadyReceivingException
import com.google.firebase.ai.type.Transcription
import io.ktor.websocket.close
import kotlinx.coroutines.reactive.asPublisher
import org.reactivestreams.Publisher
Expand All @@ -53,6 +54,18 @@ public abstract class LiveSessionFutures internal constructor() {
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
Expand All @@ -73,6 +86,50 @@ public abstract class LiveSessionFutures internal constructor() {
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param functionCallHandler A callback function that is invoked whenever the model receives a
* function call.
*
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
Expand Down Expand Up @@ -233,6 +290,14 @@ public abstract class LiveSessionFutures internal constructor() {
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) }

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(transcriptHandler = transcriptHandler)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
Expand All @@ -243,6 +308,32 @@ public abstract class LiveSessionFutures internal constructor() {
session.startAudioConversation(enableInterruptions = enableInterruptions)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(
functionCallHandler = functionCallHandler,
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.firebase.ai.type

import kotlinx.serialization.Serializable

/** The audio transcription configuration. Its presence enables audio transcription */
public class AudioTranscriptionConfig() {

@Serializable internal object Internal

internal fun toInternal() = Internal
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ internal class LiveClientSetupMessage(
// needs its own config class
val generationConfig: LiveGenerationConfig.Internal?,
val tools: List<Tool.Internal>?,
val systemInstruction: Content.Internal?
val systemInstruction: Content.Internal?,
val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
) {
@Serializable
internal class Internal(val setup: LiveClientSetup) {
Expand All @@ -41,10 +43,21 @@ internal class LiveClientSetupMessage(
val model: String,
val generationConfig: LiveGenerationConfig.Internal?,
val tools: List<Tool.Internal>?,
val systemInstruction: Content.Internal?
val systemInstruction: Content.Internal?,
val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
)
}

fun toInternal() =
Internal(Internal.LiveClientSetup(model, generationConfig, tools, systemInstruction))
Internal(
Internal.LiveClientSetup(
model,
generationConfig,
tools,
systemInstruction,
inputAudioTranscription,
outputAudioTranscription
)
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ import kotlinx.serialization.Serializable
*
* @property speechConfig Specifies the voice configuration of the audio response from the server.
*
* @property inputAudioTranscription Specifies the configuration for transcribing input audio.
*
* @property outputAudioTranscription Specifies the configuration for transcribing output audio from
* the model.
*
* Refer to the
* [Control generated output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
* guide for more details.
Expand All @@ -67,7 +72,9 @@ private constructor(
internal val presencePenalty: Float?,
internal val frequencyPenalty: Float?,
internal val responseModality: ResponseModality?,
internal val speechConfig: SpeechConfig?
internal val speechConfig: SpeechConfig?,
internal val inputAudioTranscription: AudioTranscriptionConfig?,
internal val outputAudioTranscription: AudioTranscriptionConfig?,
) {

/**
Expand All @@ -91,6 +98,10 @@ private constructor(
* @property responseModality See [LiveGenerationConfig.responseModality]
*
* @property speechConfig See [LiveGenerationConfig.speechConfig]
*
* @property inputAudioTranscription see [LiveGenerationConfig.inputAudioTranscription]
*
* @property outputAudioTranscription see [LiveGenerationConfig.outputAudioTranscription]
*/
public class Builder {
@JvmField public var temperature: Float? = null
Expand All @@ -101,6 +112,8 @@ private constructor(
@JvmField public var frequencyPenalty: Float? = null
@JvmField public var responseModality: ResponseModality? = null
@JvmField public var speechConfig: SpeechConfig? = null
@JvmField public var inputAudioTranscription: AudioTranscriptionConfig? = null
@JvmField public var outputAudioTranscription: AudioTranscriptionConfig? = null

public fun setTemperature(temperature: Float?): Builder = apply {
this.temperature = temperature
Expand All @@ -123,6 +136,14 @@ private constructor(
this.speechConfig = speechConfig
}

public fun setInputAudioTranscript(config: AudioTranscriptionConfig?): Builder = apply {
this.inputAudioTranscription = config
}

public fun setOutputAudioTranscript(config: AudioTranscriptionConfig?): Builder = apply {
this.outputAudioTranscription = config
}

/** Create a new [LiveGenerationConfig] with the attached arguments. */
public fun build(): LiveGenerationConfig =
LiveGenerationConfig(
Expand All @@ -133,7 +154,9 @@ private constructor(
presencePenalty = presencePenalty,
frequencyPenalty = frequencyPenalty,
speechConfig = speechConfig,
responseModality = responseModality
responseModality = responseModality,
inputAudioTranscription = inputAudioTranscription,
outputAudioTranscription = outputAudioTranscription,
)
}

Expand Down
Loading
Loading