Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions firebase-ai/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Unreleased

- [changed] Added `LiveAudioConversationConfig` to control different aspects of the conversation
while using the `startAudioConversation` function.
- [changed] Added better scheduling and louder output for Live API.
- [changed] Added support for input and output transcription. (#7482)
- [feature] Added support for sending realtime audio and video in a `LiveSession`.
Expand Down
27 changes: 27 additions & 0 deletions firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ package com.google.firebase.ai.java {
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
Expand Down Expand Up @@ -838,6 +839,31 @@ package com.google.firebase.ai.type {
public final class InvalidStateException extends com.google.firebase.ai.type.FirebaseAIException {
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveAudioConversationConfig {
field public static final com.google.firebase.ai.type.LiveAudioConversationConfig.Companion Companion;
}

public static final class LiveAudioConversationConfig.Builder {
ctor public LiveAudioConversationConfig.Builder();
method public com.google.firebase.ai.type.LiveAudioConversationConfig build();
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler);
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions);
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
field public kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler;
field public boolean enableInterruptions;
field public kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler;
field public kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler;
}

public static final class LiveAudioConversationConfig.Companion {
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder builder();
}

public final class LiveAudioConversationConfigKt {
method public static com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveAudioConversationConfig.Builder,kotlin.Unit> init);
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveGenerationConfig {
field public static final com.google.firebase.ai.type.LiveGenerationConfig.Companion Companion;
}
Expand Down Expand Up @@ -922,6 +948,7 @@ package com.google.firebase.ai.type {
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
Expand Down
2 changes: 1 addition & 1 deletion firebase-ai/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version=17.5.0
version=99.9.9
latestReleasedVersion=17.4.0
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import com.google.firebase.ai.type.Content
import com.google.firebase.ai.type.FunctionCallPart
import com.google.firebase.ai.type.FunctionResponsePart
import com.google.firebase.ai.type.InlineData
import com.google.firebase.ai.type.LiveAudioConversationConfig
import com.google.firebase.ai.type.LiveServerMessage
import com.google.firebase.ai.type.LiveSession
import com.google.firebase.ai.type.MediaData
Expand All @@ -49,6 +50,18 @@ public abstract class LiveSessionFutures internal constructor() {
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation].
*
* @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to
* control the various aspects of the conversation.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
liveAudioConversationConfig: LiveAudioConversationConfig
): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
Expand Down Expand Up @@ -298,6 +311,12 @@ public abstract class LiveSessionFutures internal constructor() {
session.startAudioConversation(transcriptHandler = transcriptHandler)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(liveAudioConversationConfig: LiveAudioConversationConfig) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(liveAudioConversationConfig)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package com.google.firebase.ai.type
import android.Manifest
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioRecord
import android.media.AudioTrack
import android.media.MediaRecorder
Expand Down Expand Up @@ -157,28 +156,39 @@ internal class AudioHelper(
*
* It also makes it easier to read, since the long initialization is separate from the
* constructor.
*
* @param audioHandler A callback that is invoked immediately following the successful
* initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final
* opportunity to configure these objects, which will remain valid and effective for the
* duration of the current audio session.
*/
@RequiresPermission(Manifest.permission.RECORD_AUDIO)
fun build(): AudioHelper {
val playbackTrack =
AudioTrack(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_MEDIA)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build(),
fun build(
audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null
): AudioHelper {
val playTrackBuilder = AudioTrack.Builder()
playTrackBuilder
.setAudioFormat(
AudioFormat.Builder()
.setSampleRate(24000)
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
.build(),
.build()
)
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_MEDIA)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build()
)
.setBufferSizeInBytes(
AudioTrack.getMinBufferSize(
24000,
AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_16BIT
),
AudioTrack.MODE_STREAM,
AudioManager.AUDIO_SESSION_ID_GENERATE
)
.setTransferMode(AudioTrack.MODE_STREAM)

val bufferSize =
AudioRecord.getMinBufferSize(
Expand All @@ -191,15 +201,22 @@ internal class AudioHelper(
throw AudioRecordInitializationFailedException(
"Audio Record buffer size is invalid ($bufferSize)"
)

val recorder =
AudioRecord(
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
16000,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
bufferSize
)
val recorderBuilder =
AudioRecord.Builder()
.setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION)
.setAudioFormat(
AudioFormat.Builder()
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
.setSampleRate(16000)
.setChannelMask(AudioFormat.CHANNEL_IN_MONO)
.build()
)
.setBufferSizeInBytes(bufferSize)
if (audioHandler != null) {
audioHandler(recorderBuilder, playTrackBuilder)
}
val recorder = recorderBuilder.build()
val playbackTrack = playTrackBuilder.build()
if (recorder.state != AudioRecord.STATE_INITIALIZED)
throw AudioRecordInitializationFailedException(
"Audio Record initialization has failed. State: ${recorder.state}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.firebase.ai.type

import android.media.AudioRecord
import android.media.AudioTrack

/**
* Configuration parameters to use for conversation config.
*
* @property functionCallHandler A callback that is invoked whenever the model receives a function
* call. The [FunctionResponsePart] that the callback function returns will be automatically sent to
* the model.
*
* @property transcriptHandler A callback that is invoked whenever the model receives a transcript.
* The first [Transcription] object is the input transcription, and the second is the output
* transcription.
*
* @property audioHandler A callback that is invoked immediately following the successful
* initialization of the associated [AudioRecord.Builder] and [AudioTrack.Builder] objects. This
* offers a final opportunity to configure these objects, which will remain valid and effective for
* the duration of the current audio session.
*
* @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@PublicPreviewAPI
public class LiveAudioConversationConfig
private constructor(
internal val functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
internal val audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)?,
internal val transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
internal val enableInterruptions: Boolean
) {

/**
* Builder for creating a [LiveAudioConversationConfig].
*
* Mainly intended for Java interop. Kotlin consumers should use [liveAudioConversationConfig] for
* a more idiomatic experience.
*
* @property functionCallHandler See [LiveAudioConversationConfig.functionCallHandler].
*
* @property audioHandler See [LiveAudioConversationConfig.audioHandler].
*
* @property transcriptHandler See [LiveAudioConversationConfig.transcriptHandler].
*
* @property enableInterruptions See [LiveAudioConversationConfig.enableInterruptions].
*/
public class Builder {
@JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null
@JvmField public var audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null
@JvmField public var transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null
@JvmField public var enableInterruptions: Boolean = false

public fun setFunctionCallHandler(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
): Builder = apply { this.functionCallHandler = functionCallHandler }

public fun setAudioHandler(
audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)?
): Builder = apply { this.audioHandler = audioHandler }

public fun setTranscriptHandler(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
): Builder = apply { this.transcriptHandler = transcriptHandler }

public fun setEnableInterruptions(enableInterruptions: Boolean): Builder = apply {
this.enableInterruptions = enableInterruptions
}

/** Create a new [LiveAudioConversationConfig] with the attached arguments. */
public fun build(): LiveAudioConversationConfig =
LiveAudioConversationConfig(
functionCallHandler = functionCallHandler,
audioHandler = audioHandler,
transcriptHandler = transcriptHandler,
enableInterruptions = enableInterruptions
)
}

public companion object {

/**
* Alternative casing for [LiveAudioConversationConfig.Builder]:
* ```
* val config = LiveAudioConversationConfig.builder()
* ```
*/
public fun builder(): Builder = Builder()
}
}

/**
* Helper method to construct a [LiveAudioConversationConfig] in a DSL-like manner.
*
* Example Usage:
* ```
* liveAudioConversationConfig {
* functionCallHandler = ...
* audioHandler = ...
* ...
* }
* ```
*/
@OptIn(PublicPreviewAPI::class)
public fun liveAudioConversationConfig(
init: LiveAudioConversationConfig.Builder.() -> Unit
): LiveAudioConversationConfig {
val builder = LiveAudioConversationConfig.builder()
builder.init()
return builder.build()
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,26 @@ internal constructor(
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
enableInterruptions: Boolean = false,
) {
startAudioConversation(
liveAudioConversationConfig {
this.functionCallHandler = functionCallHandler
this.transcriptHandler = transcriptHandler
this.enableInterruptions = enableInterruptions
}
)
}

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to
* control the various aspects of the conversation.
*/
@RequiresPermission(RECORD_AUDIO)
public suspend fun startAudioConversation(
liveAudioConversationConfig: LiveAudioConversationConfig
) {

val context = firebaseApp.applicationContext
if (
Expand All @@ -191,11 +211,14 @@ internal constructor(
networkScope =
CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network"))
audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio"))
audioHelper = AudioHelper.build()
audioHelper = AudioHelper.build(liveAudioConversationConfig.audioHandler)

recordUserAudio()
processModelResponses(functionCallHandler, transcriptHandler)
listenForModelPlayback(enableInterruptions)
processModelResponses(
liveAudioConversationConfig.functionCallHandler,
liveAudioConversationConfig.transcriptHandler
)
listenForModelPlayback(liveAudioConversationConfig.enableInterruptions)
}
}

Expand Down
Loading