Skip to content
Merged
2 changes: 2 additions & 0 deletions firebase-ai/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Unreleased

- [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig`
- [changed] Added support for user interrupts for the `startAudioConversation` method in the
`LiveSession` class. (#7413)
- [changed] Added support for the URL context tool, which allows the model to access content from
provided public web URLs to inform and enhance its responses. (#7382)
- [changed] Added better error messages to `ServiceConnectionHandshakeFailedException` (#7412)
Expand Down
5 changes: 4 additions & 1 deletion firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ package com.google.firebase.ai.java {
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList);
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
method public abstract void stopReceiving();
field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
Expand Down Expand Up @@ -891,6 +893,7 @@ package com.google.firebase.ai.type {
method public suspend Object? send(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public void stopAudioConversation();
method public void stopReceiving();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package com.google.firebase.ai.common.util

import android.media.AudioRecord
import kotlin.time.Duration.Companion.milliseconds
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.yield

Expand All @@ -38,13 +40,15 @@ internal fun AudioRecord.readAsFlow() = flow {

while (true) {
if (recordingState != AudioRecord.RECORDSTATE_RECORDING) {
// TODO(vguthal): Investigate if both yield and delay are required.
delay(10.milliseconds)
yield()
continue
}

val bytesRead = read(buffer, 0, buffer.size)
if (bytesRead > 0) {
emit(buffer.copyOf(bytesRead))
}
yield()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public abstract class LiveSessionFutures internal constructor() {
* @param functionCallHandler A callback function that is invoked whenever the model receives a
* function call.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
): ListenableFuture<Unit>
Expand All @@ -58,6 +59,38 @@ public abstract class LiveSessionFutures internal constructor() {
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture<Unit>

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param functionCallHandler A callback function that is invoked whenever the model receives a
* function call.
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public abstract fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
enableInterruptions: Boolean
): ListenableFuture<Unit>

/**
* Stops the audio conversation with the Gemini Server.
*
Expand Down Expand Up @@ -169,6 +202,24 @@ public abstract class LiveSessionFutures internal constructor() {
override fun startAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(enableInterruptions: Boolean) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(enableInterruptions = enableInterruptions)
}

@RequiresPermission(RECORD_AUDIO)
override fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
enableInterruptions: Boolean
) =
SuspendToFutureAdapter.launchFuture {
session.startAudioConversation(
functionCallHandler,
enableInterruptions = enableInterruptions
)
}

override fun stopAudioConversation() =
SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ internal class AudioHelper(
*/
fun listenToRecording(): Flow<ByteArray> {
if (released) return emptyFlow()

resumeRecording()

return recorder.readAsFlow()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,28 @@ internal constructor(
public suspend fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null
) {
startAudioConversation(functionCallHandler, false)
}

/**
* Starts an audio conversation with the model, which can only be stopped using
* [stopAudioConversation] or [close].
*
* @param functionCallHandler A callback function that is invoked whenever the model receives a
* function call. The [FunctionResponsePart] that the callback function returns will be
* automatically sent to the model.
*
* @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
* ongoing reply.
*
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
* consistently available.
*/
@RequiresPermission(RECORD_AUDIO)
public suspend fun startAudioConversation(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null,
enableInterruptions: Boolean = false,
) {

val context = firebaseApp.applicationContext
if (
Expand All @@ -120,7 +142,7 @@ internal constructor(

recordUserAudio()
processModelResponses(functionCallHandler)
listenForModelPlayback()
listenForModelPlayback(enableInterruptions)
}
}

Expand Down Expand Up @@ -375,23 +397,26 @@ internal constructor(
*
* Launched asynchronously on [scope].
*/
private fun listenForModelPlayback() {
private fun listenForModelPlayback(enableInterruptions: Boolean = false) {
scope.launch {
while (isActive) {
val playbackData = playBackQueue.poll()
if (playbackData == null) {
// The model playback queue is complete, so we can continue recording
// TODO(b/408223520): Conditionally resume when param is added
audioHelper?.resumeRecording()
if (!enableInterruptions) {
audioHelper?.resumeRecording()
}
yield()
} else {
/**
* We pause the recording while the model is speaking to avoid interrupting it because of
* no echo cancellation
*/
// TODO(b/408223520): Conditionally pause when param is added
audioHelper?.pauseRecording()

if (enableInterruptions != true) {
audioHelper?.pauseRecording()
}
audioHelper?.playAudio(playbackData)
}
}
Expand Down