Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ package com.google.firebase.ai.java {
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
Expand Down Expand Up @@ -289,6 +290,31 @@ package com.google.firebase.ai.type {
public static final class ContentModality.Companion {
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class ConversationConfig {
field public static final com.google.firebase.ai.type.ConversationConfig.Companion Companion;
}

public static final class ConversationConfig.Builder {
ctor public ConversationConfig.Builder();
method public com.google.firebase.ai.type.ConversationConfig build();
method public com.google.firebase.ai.type.ConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler);
method public com.google.firebase.ai.type.ConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions);
method public com.google.firebase.ai.type.ConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method public com.google.firebase.ai.type.ConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
field public kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler;
field public boolean enableInterruptions;
field public kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler;
field public kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler;
}

public static final class ConversationConfig.Companion {
method public com.google.firebase.ai.type.ConversationConfig.Builder builder();
}

public final class ConversationConfigKt {
method public static com.google.firebase.ai.type.ConversationConfig conversationConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.ConversationConfig.Builder,kotlin.Unit> init);
}

public final class CountTokensResponse {
ctor public CountTokensResponse(int totalTokens, @Deprecated Integer? totalBillableCharacters = null, java.util.List<com.google.firebase.ai.type.ModalityTokenCount> promptTokensDetails = emptyList());
method public operator int component1();
Expand Down Expand Up @@ -922,6 +948,7 @@ package com.google.firebase.ai.type {
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.ConversationConfig conversationConfig, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import androidx.annotation.RequiresPermission
import androidx.concurrent.futures.SuspendToFutureAdapter
import com.google.common.util.concurrent.ListenableFuture
import com.google.firebase.ai.type.Content
import com.google.firebase.ai.type.ConversationConfig
import com.google.firebase.ai.type.FunctionCallPart
import com.google.firebase.ai.type.FunctionResponsePart
import com.google.firebase.ai.type.InlineData
Expand Down Expand Up @@ -49,6 +50,18 @@ public abstract class LiveSessionFutures internal constructor() {
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
*
* @param conversationConfig A [ConversationConfig] provided by the user to control the various
* aspects of the conversation.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
conversationConfig: ConversationConfig
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
Expand Down Expand Up @@ -298,6 +311,10 @@ public abstract class LiveSessionFutures internal constructor() {
session.startAudioConversation(transcriptHandler = transcriptHandler)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(conversationConfig: ConversationConfig) =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation(conversationConfig) }

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ internal class AudioHelper(
* constructor.
*/
@RequiresPermission(Manifest.permission.RECORD_AUDIO)
fun build(): AudioHelper {
fun build(audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null): AudioHelper {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the kdoc to include new parameter

val playbackTrack =
AudioTrack(
AudioAttributes.Builder()
Expand Down Expand Up @@ -209,6 +209,9 @@ internal class AudioHelper(
AcousticEchoCanceler.create(recorder.audioSessionId)?.enabled = true
}

if (audioHandler != null) {
audioHandler(recorder, playbackTrack)
}
return AudioHelper(recorder, playbackTrack)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.firebase.ai.type

import android.media.AudioRecord
import android.media.AudioTrack

/**
* Configuration parameters to use for conversation config.
*
* @property functionCallHandler A callback function that is invoked whenever the model receives a
* function call. The [FunctionResponsePart] that the callback function returns will be
* automatically sent to the model.
*
* @property transcriptHandler A callback function that is invoked whenever the model receives a
* transcript. The first [Transcription] object is the input transcription, and the second is the
* output transcription.
*
* @property audioHandler A callback function that is invoked immediately following the successful
* initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final
* opportunity to apply custom configurations or modifications to these objects, which will remain
* valid and effective for the duration of the current audio session.
*
* @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@PublicPreviewAPI
public class ConversationConfig
private constructor(
internal var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
internal var audioHandler: ((AudioRecord, AudioTrack) -> Unit)?,
internal var transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
internal var enableInterruptions: Boolean
) {

/**
* Builder for creating a [ConversationConfig].
*
* Mainly intended for Java interop. Kotlin consumers should use [conversationConfig] for a more
* idiomatic experience.
*
* @property functionCallHandler See [ConversationConfig.functionCallHandler].
*
* @property audioHandler See [ConversationConfig.audioHandler].
*
* @property transcriptHandler See [ConversationConfig.transcriptHandler].
*
* @property enableInterruptions See [ConversationConfig.enableInterruptions].
*/
public class Builder {
@JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null
@JvmField public var audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null
@JvmField public var transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null
@JvmField public var enableInterruptions: Boolean = false

public fun setFunctionCallHandler(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
): Builder = apply { this.functionCallHandler = functionCallHandler }

public fun setAudioHandler(audioHandler: ((AudioRecord, AudioTrack) -> Unit)?): Builder =
apply {
this.audioHandler = audioHandler
}

public fun setTranscriptHandler(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
): Builder = apply { this.transcriptHandler = transcriptHandler }

public fun setEnableInterruptions(enableInterruptions: Boolean): Builder = apply {
this.enableInterruptions = enableInterruptions
}

/** Create a new [ConversationConfig] with the attached arguments. */
public fun build(): ConversationConfig =
ConversationConfig(
functionCallHandler = functionCallHandler,
audioHandler = audioHandler,
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

public companion object {

/**
* Alternative casing for [ConversationConfig.Builder]:
* ```
* val config = ConversationConfig.builder()
* ```
*/
public fun builder(): Builder = Builder()
}
}

/**
* Helper method to construct a [ConversationConfig] in a DSL-like manner.
*
* Example Usage:
* ```
* conversationConfig {
* functionCallHandler = ...
* audioHandler = ...
* ...
* }
* ```
*/
@OptIn(PublicPreviewAPI::class)
public fun conversationConfig(init: ConversationConfig.Builder.() -> Unit): ConversationConfig {
val builder = ConversationConfig.builder()
builder.init()
return builder.build()
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ internal constructor(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
enableInterruptions: Boolean = false,
) {
val config = ConversationConfig.builder()
config.functionCallHandler = functionCallHandler
config.transcriptHandler = transcriptHandler
config.enableInterruptions = enableInterruptions
startAudioConversation(config.build())
}

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param conversationConfig A [ConversationConfig] provided by the user to control the various
* aspects of the conversation.
*/
@RequiresPermission(RECORD_AUDIO)
public suspend fun startAudioConversation(conversationConfig: ConversationConfig) {

val context = firebaseApp.applicationContext
if (
Expand All @@ -191,11 +207,14 @@ internal constructor(
networkScope =
CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network"))
audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio"))
audioHelper = AudioHelper.build()
audioHelper = AudioHelper.build(conversationConfig.audioHandler)

recordUserAudio()
processModelResponses(functionCallHandler, transcriptHandler)
listenForModelPlayback(enableInterruptions)
processModelResponses(
conversationConfig.functionCallHandler,
conversationConfig.transcriptHandler
)
listenForModelPlayback(conversationConfig.enableInterruptions)
}
}

Expand Down
Loading