Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ package com.google.firebase.ai.java {
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
method public abstract void stopReceiving();
field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
Expand All @@ -174,6 +177,10 @@ package com.google.firebase.ai.type {
ctor public AudioRecordInitializationFailedException(String message);
}

public final class AudioTranscriptionConfig {
ctor public AudioTranscriptionConfig();
}

public final class BlockReason {
method public String getName();
method public int getOrdinal();
Expand Down Expand Up @@ -839,15 +846,19 @@ package com.google.firebase.ai.type {
ctor public LiveGenerationConfig.Builder();
method public com.google.firebase.ai.type.LiveGenerationConfig build();
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setInputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTemperature(Float? temperature);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopK(Integer? topK);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopP(Float? topP);
field public Float? frequencyPenalty;
field public com.google.firebase.ai.type.AudioTranscriptionConfig? inputAudioTranscription;
field public Integer? maxOutputTokens;
field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription;
field public Float? presencePenalty;
field public com.google.firebase.ai.type.ResponseModality? responseModality;
field public com.google.firebase.ai.type.SpeechConfig? speechConfig;
Expand All @@ -865,14 +876,18 @@ package com.google.firebase.ai.type {
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage {
ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete);
ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription);
method public com.google.firebase.ai.type.Content? getContent();
method public boolean getGenerationComplete();
method public com.google.firebase.ai.type.Transcription? getInputTranscription();
method public boolean getInterrupted();
method public com.google.firebase.ai.type.Transcription? getOutputTranscription();
method public boolean getTurnComplete();
property public final com.google.firebase.ai.type.Content? content;
property public final boolean generationComplete;
property public final com.google.firebase.ai.type.Transcription? inputTranscription;
property public final boolean interrupted;
property public final com.google.firebase.ai.type.Transcription? outputTranscription;
property public final boolean turnComplete;
}

Expand Down Expand Up @@ -909,6 +924,7 @@ package com.google.firebase.ai.type {
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public void stopAudioConversation();
method public void stopReceiving();
}
Expand Down Expand Up @@ -1235,6 +1251,12 @@ package com.google.firebase.ai.type {
ctor public ToolConfig(com.google.firebase.ai.type.FunctionCallingConfig? functionCallingConfig);
}

public final class Transcription {
ctor public Transcription(String? text);
method public String? getText();
property public final String? text;
}

public final class UnknownException extends com.google.firebase.ai.type.FirebaseAIException {
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ internal constructor(
modelName,
config?.toInternal(),
tools?.map { it.toInternal() },
systemInstruction?.toInternal()
systemInstruction?.toInternal(),
config?.inputAudioTranscription?.toInternal(),
config?.outputAudioTranscription?.toInternal()
)
.toInternal()
val data: String = Json.encodeToString(clientMessage)
Expand All @@ -135,7 +137,7 @@ internal constructor(
} catch (e: ClosedReceiveChannelException) {
val reason = webSession?.closeReason?.await()
val message =
"Channel was closed by the server.${if(reason!=null) " Details: ${reason.message}" else "" }"
"Channel was closed by the server.${if (reason != null) " Details: ${reason.message}" else ""}"
throw ServiceConnectionHandshakeFailedException(message, e)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import com.google.firebase.ai.type.LiveSession
import com.google.firebase.ai.type.MediaData
import com.google.firebase.ai.type.PublicPreviewAPI
import com.google.firebase.ai.type.SessionAlreadyReceivingException
import com.google.firebase.ai.type.Transcription
import io.ktor.websocket.close
import kotlinx.coroutines.reactive.asPublisher
import org.reactivestreams.Publisher
Expand All @@ -41,6 +42,13 @@ import org.reactivestreams.Publisher
@PublicPreviewAPI
public abstract class LiveSessionFutures internal constructor() {

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
Expand All @@ -56,9 +64,14 @@ public abstract class LiveSessionFutures internal constructor() {
/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(): ListenableFuture<Unit>
public abstract fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
Expand All @@ -73,6 +86,26 @@ public abstract class LiveSessionFutures internal constructor() {
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
Expand All @@ -92,6 +125,30 @@ public abstract class LiveSessionFutures internal constructor() {
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param functionCallHandler A callback function that is invoked whenever the model receives a
* function call.
*
* @param transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Stops the audio conversation with the Gemini Server.
*
Expand Down Expand Up @@ -233,6 +290,14 @@ public abstract class LiveSessionFutures internal constructor() {
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) }

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(transcriptHandler = transcriptHandler)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
Expand All @@ -243,6 +308,32 @@ public abstract class LiveSessionFutures internal constructor() {
session.startAudioConversation(enableInterruptions = enableInterruptions)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
enableInterruptions: Boolean
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(
functionCallHandler = functionCallHandler,
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.firebase.ai.type

import kotlinx.serialization.Serializable

/** The audio transcription configuration. Its presence enables audio transcription */
public class AudioTranscriptionConfig {

@Serializable internal object Internal

internal fun toInternal() = Internal
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ internal class LiveClientSetupMessage(
// needs its own config class
val generationConfig: LiveGenerationConfig.Internal?,
val tools: List<Tool.Internal>?,
val systemInstruction: Content.Internal?
val systemInstruction: Content.Internal?,
val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
) {
@Serializable
internal class Internal(val setup: LiveClientSetup) {
Expand All @@ -41,10 +43,21 @@ internal class LiveClientSetupMessage(
val model: String,
val generationConfig: LiveGenerationConfig.Internal?,
val tools: List<Tool.Internal>?,
val systemInstruction: Content.Internal?
val systemInstruction: Content.Internal?,
val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
)
}

fun toInternal() =
Internal(Internal.LiveClientSetup(model, generationConfig, tools, systemInstruction))
Internal(
Internal.LiveClientSetup(
model,
generationConfig,
tools,
systemInstruction,
inputAudioTranscription,
outputAudioTranscription
)
)
}
Loading
Loading