Skip to content

Commit dcce267

Browse files
authored
Merge branch 'main' into bugfix/Issue.7296.Fix.ClassNotFoundException
2 parents e79a400 + 5be1c09 commit dcce267

File tree

23 files changed

+520
-78
lines changed

23 files changed

+520
-78
lines changed

firebase-ai/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Unreleased
22

3+
- [changed] Added `LiveAudioConversationConfig` to control different aspects of the conversation
4+
while using the `startAudioConversation` function.
5+
- [fixed] Fixed an issue causing streaming chat interactions to drop thought signatures. (#7562)
6+
7+
# 17.6.0
8+
39
- [feature] Added support for server templates via `TemplateGenerativeModel` and
410
`TemplateImagenModel`. (#7503)
511

firebase-ai/api.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ package com.google.firebase.ai.java {
165165
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
166166
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
167167
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
168+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig);
168169
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
169170
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
170171
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
@@ -874,6 +875,31 @@ package com.google.firebase.ai.type {
874875
public final class InvalidStateException extends com.google.firebase.ai.type.FirebaseAIException {
875876
}
876877

878+
@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveAudioConversationConfig {
879+
field public static final com.google.firebase.ai.type.LiveAudioConversationConfig.Companion Companion;
880+
}
881+
882+
public static final class LiveAudioConversationConfig.Builder {
883+
ctor public LiveAudioConversationConfig.Builder();
884+
method public com.google.firebase.ai.type.LiveAudioConversationConfig build();
885+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions);
886+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
887+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setInitializationHandler(kotlin.jvm.functions.Function2<? super android.media.AudioRecord.Builder,? super android.media.AudioTrack.Builder,kotlin.Unit>? initializationHandler);
888+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
889+
field public boolean enableInterruptions;
890+
field public kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler;
891+
field public kotlin.jvm.functions.Function2<? super android.media.AudioRecord.Builder,? super android.media.AudioTrack.Builder,kotlin.Unit>? initializationHandler;
892+
field public kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler;
893+
}
894+
895+
public static final class LiveAudioConversationConfig.Companion {
896+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder builder();
897+
}
898+
899+
public final class LiveAudioConversationConfigKt {
900+
method public static com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveAudioConversationConfig.Builder,kotlin.Unit> init);
901+
}
902+
877903
@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveGenerationConfig {
878904
field public static final com.google.firebase.ai.type.LiveGenerationConfig.Companion Companion;
879905
}
@@ -958,6 +984,7 @@ package com.google.firebase.ai.type {
958984
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
959985
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
960986
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
987+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig, kotlin.coroutines.Continuation<? super kotlin.Unit>);
961988
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
962989
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
963990
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);

firebase-ai/gradle.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version=17.6.0
16-
latestReleasedVersion=17.5.0
15+
version=17.7.0
16+
latestReleasedVersion=17.6.0

firebase-ai/src/main/kotlin/com/google/firebase/ai/Chat.kt

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import com.google.firebase.ai.type.GenerateContentResponse
2222
import com.google.firebase.ai.type.ImagePart
2323
import com.google.firebase.ai.type.InlineDataPart
2424
import com.google.firebase.ai.type.InvalidStateException
25+
import com.google.firebase.ai.type.Part
2526
import com.google.firebase.ai.type.TextPart
2627
import com.google.firebase.ai.type.content
2728
import java.util.LinkedList
@@ -133,6 +134,7 @@ public class Chat(
133134
val bitmaps = LinkedList<Bitmap>()
134135
val inlineDataParts = LinkedList<InlineDataPart>()
135136
val text = StringBuilder()
137+
val parts = mutableListOf<Part>()
136138

137139
/**
138140
* TODO: revisit when images and inline data are returned. This will cause issues with how
@@ -147,22 +149,17 @@ public class Chat(
147149
is ImagePart -> bitmaps.add(part.image)
148150
is InlineDataPart -> inlineDataParts.add(part)
149151
}
152+
parts.add(part)
150153
}
151154
}
152155
.onCompletion {
153156
lock.release()
154157
if (it == null) {
155158
val content =
156159
content("model") {
157-
for (bitmap in bitmaps) {
158-
image(bitmap)
159-
}
160-
for (inlineDataPart in inlineDataParts) {
161-
inlineData(inlineDataPart.inlineData, inlineDataPart.mimeType)
162-
}
163-
if (text.isNotBlank()) {
164-
text(text.toString())
165-
}
160+
setParts(
161+
parts.filterNot { part -> part is TextPart && !part.hasContent() }.toMutableList()
162+
)
166163
}
167164

168165
history.add(prompt)
@@ -224,3 +221,12 @@ public class Chat(
224221
}
225222
}
226223
}
224+
225+
/**
226+
* Returns true if the [TextPart] contains any content, either in its [TextPart.text] property or
227+
* its [TextPart.thoughtSignature] property.
228+
*/
229+
private fun TextPart.hasContent(): Boolean {
230+
if (text.isNotEmpty()) return true
231+
return !thoughtSignature.isNullOrBlank()
232+
}

firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import com.google.firebase.ai.type.Content
2424
import com.google.firebase.ai.type.FunctionCallPart
2525
import com.google.firebase.ai.type.FunctionResponsePart
2626
import com.google.firebase.ai.type.InlineData
27+
import com.google.firebase.ai.type.LiveAudioConversationConfig
2728
import com.google.firebase.ai.type.LiveServerMessage
2829
import com.google.firebase.ai.type.LiveSession
2930
import com.google.firebase.ai.type.MediaData
@@ -49,6 +50,18 @@ public abstract class LiveSessionFutures internal constructor() {
4950
@RequiresPermission(RECORD_AUDIO)
5051
public abstract fun startAudioConversation(): ListenableFuture<Unit>
5152

53+
/**
54+
* Starts an audio conversation with the model, which can only be stopped using
55+
* [stopAudioConversation].
56+
*
57+
* @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to
58+
* control the various aspects of the conversation.
59+
*/
60+
@RequiresPermission(RECORD_AUDIO)
61+
public abstract fun startAudioConversation(
62+
liveAudioConversationConfig: LiveAudioConversationConfig
63+
): ListenableFuture<Unit>
64+
5265
/**
5366
* Starts an audio conversation with the model, which can only be stopped using
5467
* [stopAudioConversation] or [close].
@@ -197,9 +210,12 @@ public abstract class LiveSessionFutures internal constructor() {
197210
public abstract fun sendVideoRealtime(video: InlineData): ListenableFuture<Unit>
198211

199212
/**
200-
* Sends text data to the server in realtime. Check
201-
* https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime
202-
* input usage.
213+
* For details about the realtime input usage, see the `BidiGenerateContentRealtimeInput`
214+
* documentation (
215+
* [Gemini Developer API](https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput) or
216+
* [Vertex AI Gemini API](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#bidigeneratecontentrealtimeinput)
217+
* ).
218+
*
203219
* @param text The text data to send.
204220
*/
205221
public abstract fun sendTextRealtime(text: String): ListenableFuture<Unit>
@@ -211,7 +227,7 @@ public abstract class LiveSessionFutures internal constructor() {
211227
*
212228
* @param mediaChunks The list of [MediaData] instances representing the media data to be sent.
213229
*/
214-
@Deprecated("Use sendAudioRealtime, sendVideoRealtime, or sendTextRealtime instead")
230+
@Deprecated("Use `sendAudioRealtime`, `sendVideoRealtime`, or `sendTextRealtime` instead")
215231
public abstract fun sendMediaStream(mediaChunks: List<MediaData>): ListenableFuture<Unit>
216232

217233
/**
@@ -294,6 +310,12 @@ public abstract class LiveSessionFutures internal constructor() {
294310
session.startAudioConversation(transcriptHandler = transcriptHandler)
295311
}
296312

313+
@RequiresPermission(RECORD_AUDIO)
314+
override fun startAudioConversation(liveAudioConversationConfig: LiveAudioConversationConfig) =
315+
SuspendToFutureAdapter.launchFuture {
316+
session.startAudioConversation(liveAudioConversationConfig)
317+
}
318+
297319
@RequiresPermission(RECORD_AUDIO)
298320
override fun startAudioConversation() =
299321
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package com.google.firebase.ai.type
1919
import android.Manifest
2020
import android.media.AudioAttributes
2121
import android.media.AudioFormat
22-
import android.media.AudioManager
2322
import android.media.AudioRecord
2423
import android.media.AudioTrack
2524
import android.media.MediaRecorder
@@ -157,28 +156,39 @@ internal class AudioHelper(
157156
*
158157
* It also makes it easier to read, since the long initialization is separate from the
159158
* constructor.
159+
*
160+
* @param initializationHandler A callback that is invoked immediately following the successful
161+
* initialization of the associated [AudioRecord.Builder] and [AudioTrack.Builder] objects. This
162+
* offers a final opportunity to configure these objects, which will remain valid and effective
163+
* for the duration of the current audio session.
160164
*/
161165
@RequiresPermission(Manifest.permission.RECORD_AUDIO)
162-
fun build(): AudioHelper {
163-
val playbackTrack =
164-
AudioTrack(
165-
AudioAttributes.Builder()
166-
.setUsage(AudioAttributes.USAGE_MEDIA)
167-
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
168-
.build(),
166+
fun build(
167+
initializationHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null
168+
): AudioHelper {
169+
val playTrackBuilder = AudioTrack.Builder()
170+
playTrackBuilder
171+
.setAudioFormat(
169172
AudioFormat.Builder()
170173
.setSampleRate(24000)
171174
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
172175
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
173-
.build(),
176+
.build()
177+
)
178+
.setAudioAttributes(
179+
AudioAttributes.Builder()
180+
.setUsage(AudioAttributes.USAGE_MEDIA)
181+
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
182+
.build()
183+
)
184+
.setBufferSizeInBytes(
174185
AudioTrack.getMinBufferSize(
175186
24000,
176187
AudioFormat.CHANNEL_OUT_MONO,
177188
AudioFormat.ENCODING_PCM_16BIT
178189
),
179-
AudioTrack.MODE_STREAM,
180-
AudioManager.AUDIO_SESSION_ID_GENERATE
181190
)
191+
.setTransferMode(AudioTrack.MODE_STREAM)
182192

183193
val bufferSize =
184194
AudioRecord.getMinBufferSize(
@@ -191,15 +201,22 @@ internal class AudioHelper(
191201
throw AudioRecordInitializationFailedException(
192202
"Audio Record buffer size is invalid ($bufferSize)"
193203
)
194-
195-
val recorder =
196-
AudioRecord(
197-
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
198-
16000,
199-
AudioFormat.CHANNEL_IN_MONO,
200-
AudioFormat.ENCODING_PCM_16BIT,
201-
bufferSize
202-
)
204+
val recorderBuilder =
205+
AudioRecord.Builder()
206+
.setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION)
207+
.setAudioFormat(
208+
AudioFormat.Builder()
209+
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
210+
.setSampleRate(16000)
211+
.setChannelMask(AudioFormat.CHANNEL_IN_MONO)
212+
.build()
213+
)
214+
.setBufferSizeInBytes(bufferSize)
215+
if (initializationHandler != null) {
216+
initializationHandler(recorderBuilder, playTrackBuilder)
217+
}
218+
val recorder = recorderBuilder.build()
219+
val playbackTrack = playTrackBuilder.build()
203220
if (recorder.state != AudioRecord.STATE_INITIALIZED)
204221
throw AudioRecordInitializationFailedException(
205222
"Audio Record initialization has failed. State: ${recorder.state}"

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Candidate.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ internal constructor(
8484
*
8585
* The rating will be restricted to a particular [category].
8686
*
87-
* @property category The category of harm being assessed (e.g., Hate speech).
87+
* @property category The category of harm being assessed (for example, Hate speech).
8888
* @property probability The likelihood of the content causing harm.
8989
* @property probabilityScore A numerical score representing the probability of harm, between `0`
9090
* and `1`.

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ContentModality.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public class ContentModality private constructor(public val ordinal: Int) {
7171
/** Audio. */
7272
@JvmField public val AUDIO: ContentModality = ContentModality(4)
7373

74-
/** Document, e.g. PDF. */
74+
/** Document (for example, PDF). */
7575
@JvmField public val DOCUMENT: ContentModality = ContentModality(5)
7676
}
7777
}

0 commit comments

Comments
 (0)