From 92c71176a1e3a86a505b0c05fca58af14dd46ae0 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 22 Apr 2025 14:03:46 -0400 Subject: [PATCH 1/9] add multimodal api --- firebase-vertexai/gradle.properties | 2 +- .../firebase/vertexai/type/GenerateContentResponse.kt | 5 +++++ .../com/google/firebase/vertexai/type/GenerationConfig.kt | 8 +++++++- .../com/google/firebase/vertexai/type/ResponseModality.kt | 1 - 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/firebase-vertexai/gradle.properties b/firebase-vertexai/gradle.properties index 6b68ce2a134..9bf9cd8d627 100644 --- a/firebase-vertexai/gradle.properties +++ b/firebase-vertexai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=16.4.0 +version=99.9.9 latestReleasedVersion=16.3.0 diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 00395252914..2f3f89d27f2 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -44,6 +44,11 @@ public class GenerateContentResponse( candidates.first().content.parts.filterIsInstance() } + /** Convenience field to list all the [InlineDataPart]s in the response, if they exist. */ + public val inlineDataParts: List by lazy { + candidates.first().content.parts.filterIsInstance() + } + @Serializable internal data class Internal( val candidates: List? = null, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt index 4abec8a260d..93347058370 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt @@ -77,6 +77,7 @@ import kotlinx.serialization.Serializable * guide for more details. */ public class GenerationConfig +@OptIn(PublicPreviewAPI::class) private constructor( internal val temperature: Float?, internal val topK: Int?, @@ -88,6 +89,7 @@ private constructor( internal val stopSequences: List?, internal val responseMimeType: String?, internal val responseSchema: Schema?, + internal val responseModalities: List?, ) { /** @@ -128,6 +130,7 @@ private constructor( @JvmField public var stopSequences: List? = null @JvmField public var responseMimeType: String? = null @JvmField public var responseSchema: Schema? = null + @JvmField public var responseModalities: List? = null /** Create a new [GenerationConfig] with the attached arguments. */ public fun build(): GenerationConfig = @@ -142,6 +145,7 @@ private constructor( frequencyPenalty = frequencyPenalty, responseMimeType = responseMimeType, responseSchema = responseSchema, + responseModalities = responseModalities ) } @@ -156,7 +160,8 @@ private constructor( frequencyPenalty = frequencyPenalty, presencePenalty = presencePenalty, responseMimeType = responseMimeType, - responseSchema = responseSchema?.toInternal() + responseSchema = responseSchema?.toInternal(), + responseModalities = responseModalities?.map { it.toInternal() } ) @Serializable @@ -171,6 +176,7 @@ private constructor( @SerialName("presence_penalty") val presencePenalty: Float? = null, @SerialName("frequency_penalty") val frequencyPenalty: Float? = null, @SerialName("response_schema") val responseSchema: Schema.Internal? = null, + @SerialName("response_modalities") val responseModalities: List? = null ) public companion object { diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt index e6be477f845..09343755216 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt @@ -21,7 +21,6 @@ import kotlinx.serialization.KSerializer import kotlinx.serialization.Serializable /** Represents the type of content present in a response (e.g., text, image, audio). */ -@PublicPreviewAPI public class ResponseModality private constructor(public val ordinal: Int) { @Serializable(Internal.Serializer::class) From a182700f6c8a21b276ae13e8285b11bc1bb56a28 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 22 Apr 2025 14:06:23 -0400 Subject: [PATCH 2/9] spotless apply --- .../google/firebase/vertexai/type/GenerationConfig.kt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt index 93347058370..61dffd0b052 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt @@ -69,6 +69,9 @@ import kotlinx.serialization.Serializable * @property responseSchema Output schema of the generated candidate text. If set, a compatible * [responseMimeType] must also be set. * + * @property responseModalities Specifies the format of the data in which the server responds to + * requests + * * Compatible MIME types: * - `application/json`: Schema for JSON response. * @@ -77,7 +80,6 @@ import kotlinx.serialization.Serializable * guide for more details. */ public class GenerationConfig -@OptIn(PublicPreviewAPI::class) private constructor( internal val temperature: Float?, internal val topK: Int?, @@ -89,7 +91,7 @@ private constructor( internal val stopSequences: List?, internal val responseMimeType: String?, internal val responseSchema: Schema?, - internal val responseModalities: List?, + internal val responseModalities: List?, ) { /** @@ -117,6 +119,9 @@ private constructor( * @property responseMimeType See [GenerationConfig.responseMimeType]. * * @property responseSchema See [GenerationConfig.responseSchema]. + * + * @property responseModalities See [GenerationConfig.responseModalities]. + * * @see [generationConfig] */ public class Builder { From cd92cefd6de67d5475d6dfb6ca41ef1a65abe41a Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 22 Apr 2025 15:38:01 -0400 Subject: [PATCH 3/9] update parts --- .../firebase/vertexai/type/GenerateContentResponse.kt | 5 ++++- .../kotlin/com/google/firebase/vertexai/type/Part.kt | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 2f3f89d27f2..9b7df0258e4 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -46,7 +46,10 @@ public class GenerateContentResponse( /** Convenience field to list all the [InlineDataPart]s in the response, if they exist. */ public val inlineDataParts: List by lazy { - candidates.first().content.parts.filterIsInstance() + candidates.first().content.parts.let { parts -> + parts.filterIsInstance().map { it.toInlineDataPart() } + + parts.filterIsInstance() + } } @Serializable diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt index b2538a8d6a0..efd130c85ca 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt @@ -45,7 +45,14 @@ public class TextPart(public val text: String) : Part { * * @param image [Bitmap] to convert into a [Part] */ -public class ImagePart(public val image: Bitmap) : Part +public class ImagePart(public val image: Bitmap) : Part { + + internal fun toInlineDataPart() = + InlineDataPart( + android.util.Base64.decode(encodeBitmapToBase64Png(image), BASE_64_FLAGS), + "image/jpeg" + ) +} /** * Represents binary data with an associated MIME type sent to and received from requests. From b467fed0f679a8790afb344b2a838b1cb0c897a0 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 22 Apr 2025 15:45:14 -0400 Subject: [PATCH 4/9] update api text file --- firebase-vertexai/CHANGELOG.md | 1 + firebase-vertexai/api.txt | 9 ++++++--- firebase-vertexai/gradle.properties | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 0334dc35f6b..fe6726770e8 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -13,6 +13,7 @@ * [fixed] Fixed an issue with `LiveContentResponse` audio data not being present when the model was interrupted or the turn completed. (#6870) * [fixed] Fixed an issue with `LiveSession` not converting exceptions to `FirebaseVertexAIException`. (#6870) +* [feature] Enable response generation in multiple modalities. # 16.3.0 diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt index ecc567e537f..cb3d14904b3 100644 --- a/firebase-vertexai/api.txt +++ b/firebase-vertexai/api.txt @@ -132,9 +132,9 @@ package com.google.firebase.vertexai.java { method public abstract com.google.common.util.concurrent.ListenableFuture send(String text); method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); - method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); - method public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.vertexai.java.LiveSessionFutures.Companion Companion; } @@ -330,11 +330,13 @@ package com.google.firebase.vertexai.type { ctor public GenerateContentResponse(java.util.List candidates, com.google.firebase.vertexai.type.PromptFeedback? promptFeedback, com.google.firebase.vertexai.type.UsageMetadata? usageMetadata); method public java.util.List getCandidates(); method public java.util.List getFunctionCalls(); + method public java.util.List getInlineDataParts(); method public com.google.firebase.vertexai.type.PromptFeedback? getPromptFeedback(); method public String? getText(); method public com.google.firebase.vertexai.type.UsageMetadata? getUsageMetadata(); property public final java.util.List candidates; property public final java.util.List functionCalls; + property public final java.util.List inlineDataParts; property public final com.google.firebase.vertexai.type.PromptFeedback? promptFeedback; property public final String? text; property public final com.google.firebase.vertexai.type.UsageMetadata? usageMetadata; @@ -352,6 +354,7 @@ package com.google.firebase.vertexai.type { field public Integer? maxOutputTokens; field public Float? presencePenalty; field public String? responseMimeType; + field public java.util.List? responseModalities; field public com.google.firebase.vertexai.type.Schema? responseSchema; field public java.util.List? stopSequences; field public Float? temperature; @@ -690,7 +693,7 @@ package com.google.firebase.vertexai.type { public final class RequestTimeoutException extends com.google.firebase.vertexai.type.FirebaseVertexAIException { } - @com.google.firebase.vertexai.type.PublicPreviewAPI public final class ResponseModality { + public final class ResponseModality { method public int getOrdinal(); property public final int ordinal; field public static final com.google.firebase.vertexai.type.ResponseModality AUDIO; diff --git a/firebase-vertexai/gradle.properties b/firebase-vertexai/gradle.properties index 9bf9cd8d627..6b68ce2a134 100644 --- a/firebase-vertexai/gradle.properties +++ b/firebase-vertexai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=99.9.9 +version=16.4.0 latestReleasedVersion=16.3.0 From 5c9b61150c9a86f74cab7ab0c115654c56307cae Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 23 Apr 2025 10:39:20 -0400 Subject: [PATCH 5/9] updated change log --- firebase-vertexai/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index fe6726770e8..b7db00bdde2 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -13,7 +13,7 @@ * [fixed] Fixed an issue with `LiveContentResponse` audio data not being present when the model was interrupted or the turn completed. (#6870) * [fixed] Fixed an issue with `LiveSession` not converting exceptions to `FirebaseVertexAIException`. (#6870) -* [feature] Enable response generation in multiple modalities. +* [feature] Enable response generation in multiple modalities. (#6901) # 16.3.0 From d1f6f97fa2f6b7dc6129c23957a496fb195077d2 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 23 Apr 2025 10:52:03 -0400 Subject: [PATCH 6/9] update comment --- .../google/firebase/vertexai/type/GenerateContentResponse.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 9b7df0258e4..3ba52ad63cd 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -44,7 +44,7 @@ public class GenerateContentResponse( candidates.first().content.parts.filterIsInstance() } - /** Convenience field to list all the [InlineDataPart]s in the response, if they exist. */ + /** Returns inline data parts found in any `Part`s of the first candidate of the response, if any. */ public val inlineDataParts: List by lazy { candidates.first().content.parts.let { parts -> parts.filterIsInstance().map { it.toInlineDataPart() } + From a9a7457217c61fdd8efcb1b38b54f4cc0c93d405 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Wed, 23 Apr 2025 10:59:12 -0400 Subject: [PATCH 7/9] spottless apply --- .../google/firebase/vertexai/type/GenerateContentResponse.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 3ba52ad63cd..772540965a4 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -44,7 +44,9 @@ public class GenerateContentResponse( candidates.first().content.parts.filterIsInstance() } - /** Returns inline data parts found in any `Part`s of the first candidate of the response, if any. */ + /** + * Returns inline data parts found in any `Part`s of the first candidate of the response, if any. + */ public val inlineDataParts: List by lazy { candidates.first().content.parts.let { parts -> parts.filterIsInstance().map { it.toInlineDataPart() } + From e11aa7b5ad0429b8a80181f056e6ac58f76ba333 Mon Sep 17 00:00:00 2001 From: Vinay Guthal Date: Wed, 23 Apr 2025 12:45:37 -0400 Subject: [PATCH 8/9] Update firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt Co-authored-by: Daymon <17409137+daymxn@users.noreply.github.com> --- .../google/firebase/vertexai/type/GenerateContentResponse.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 772540965a4..91c2cd8f4ea 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -45,7 +45,9 @@ public class GenerateContentResponse( } /** - * Returns inline data parts found in any `Part`s of the first candidate of the response, if any. + * Convenience field representing all the [InlineDataPart]s in the first candidate, if they exist. + * + * This also includes any [ImagePart], but they will be represented as [InlineDataPart] instead. */ public val inlineDataParts: List by lazy { candidates.first().content.parts.let { parts -> From a4aec74e012436e972d4aee1c7bf4cbd0cd107fa Mon Sep 17 00:00:00 2001 From: Vinay Guthal Date: Wed, 23 Apr 2025 12:45:51 -0400 Subject: [PATCH 9/9] Update firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt Co-authored-by: Daymon <17409137+daymxn@users.noreply.github.com> --- .../com/google/firebase/vertexai/type/GenerationConfig.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt index 61dffd0b052..88705c58a92 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt @@ -69,8 +69,7 @@ import kotlinx.serialization.Serializable * @property responseSchema Output schema of the generated candidate text. If set, a compatible * [responseMimeType] must also be set. * - * @property responseModalities Specifies the format of the data in which the server responds to - * requests + * @property responseModalities The format of data in which the model should respond with. * * Compatible MIME types: * - `application/json`: Schema for JSON response.