diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 6f16cb1fcc4..029813fe9be 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -1,5 +1,5 @@ # Unreleased - +* [changed] Added support for modality-based token count. (#6658) # 16.1.0 * [changed] Internal improvements to correctly handle empty model responses. @@ -64,4 +64,3 @@ * [feature] Added support for `responseMimeType` in `GenerationConfig`. * [changed] Renamed `GoogleGenerativeAIException` to `FirebaseVertexAIException`. * [changed] Updated the KDocs for various classes and functions. - diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt index fe3003d880b..5bf1e3667bf 100644 --- a/firebase-vertexai/api.txt +++ b/firebase-vertexai/api.txt @@ -165,12 +165,30 @@ package com.google.firebase.vertexai.type { method public static com.google.firebase.vertexai.type.Content content(String? role = "user", kotlin.jvm.functions.Function1 init); } + public final class ContentModality { + method public int getOrdinal(); + property public final int ordinal; + field public static final com.google.firebase.vertexai.type.ContentModality AUDIO; + field public static final com.google.firebase.vertexai.type.ContentModality.Companion Companion; + field public static final com.google.firebase.vertexai.type.ContentModality DOCUMENT; + field public static final com.google.firebase.vertexai.type.ContentModality IMAGE; + field public static final com.google.firebase.vertexai.type.ContentModality TEXT; + field public static final com.google.firebase.vertexai.type.ContentModality UNSPECIFIED; + field public static final com.google.firebase.vertexai.type.ContentModality VIDEO; + } + + public static final class ContentModality.Companion { + } + public final class CountTokensResponse { - ctor public CountTokensResponse(int totalTokens, Integer? totalBillableCharacters = null); + ctor public CountTokensResponse(int totalTokens, Integer? totalBillableCharacters = null, java.util.List? promptTokensDetails = null); method public operator int component1(); method public operator Integer? component2(); + method public operator java.util.List? component3(); + method public java.util.List? getPromptTokensDetails(); method public Integer? getTotalBillableCharacters(); method public int getTotalTokens(); + property public final java.util.List? promptTokensDetails; property public final Integer? totalBillableCharacters; property public final int totalTokens; } @@ -369,6 +387,15 @@ package com.google.firebase.vertexai.type { public final class InvalidStateException extends com.google.firebase.vertexai.type.FirebaseVertexAIException { } + public final class ModalityTokenCount { + method public operator com.google.firebase.vertexai.type.ContentModality component1(); + method public operator int component2(); + method public com.google.firebase.vertexai.type.ContentModality getModality(); + method public int getTokenCount(); + property public final com.google.firebase.vertexai.type.ContentModality modality; + property public final int tokenCount; + } + public interface Part { } @@ -549,12 +576,16 @@ package com.google.firebase.vertexai.type { } public final class UsageMetadata { - ctor public UsageMetadata(int promptTokenCount, Integer? candidatesTokenCount, int totalTokenCount); + ctor public UsageMetadata(int promptTokenCount, Integer? candidatesTokenCount, int totalTokenCount, java.util.List? promptTokensDetails, java.util.List? candidatesTokensDetails); method public Integer? getCandidatesTokenCount(); + method public java.util.List? getCandidatesTokensDetails(); method public int getPromptTokenCount(); + method public java.util.List? getPromptTokensDetails(); method public int getTotalTokenCount(); property public final Integer? candidatesTokenCount; + property public final java.util.List? candidatesTokensDetails; property public final int promptTokenCount; + property public final java.util.List? promptTokensDetails; property public final int totalTokenCount; } diff --git a/firebase-vertexai/gradle.properties b/firebase-vertexai/gradle.properties index e6719a371ef..b686fdcb9db 100644 --- a/firebase-vertexai/gradle.properties +++ b/firebase-vertexai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=16.1.1 +version=16.2.0 latestReleasedVersion=16.1.0 diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ContentModality.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ContentModality.kt new file mode 100644 index 00000000000..dd928f92273 --- /dev/null +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ContentModality.kt @@ -0,0 +1,68 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.vertexai.type + +import com.google.firebase.vertexai.common.util.FirstOrdinalSerializer +import kotlinx.serialization.KSerializer +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** Content part modality. */ +public class ContentModality private constructor(public val ordinal: Int) { + + @Serializable(Internal.Serializer::class) + internal enum class Internal { + @SerialName("MODALITY_UNSPECIFIED") UNSPECIFIED, + TEXT, + IMAGE, + VIDEO, + AUDIO, + DOCUMENT; + + internal object Serializer : KSerializer by FirstOrdinalSerializer(Internal::class) + + internal fun toPublic() = + when (this) { + TEXT -> ContentModality.TEXT + IMAGE -> ContentModality.IMAGE + VIDEO -> ContentModality.VIDEO + AUDIO -> ContentModality.AUDIO + DOCUMENT -> ContentModality.DOCUMENT + else -> ContentModality.UNSPECIFIED + } + } + + public companion object { + /** Unspecified modality. */ + @JvmField public val UNSPECIFIED: ContentModality = ContentModality(0) + + /** Plain text. */ + @JvmField public val TEXT: ContentModality = ContentModality(1) + + /** Image. */ + @JvmField public val IMAGE: ContentModality = ContentModality(2) + + /** Video. */ + @JvmField public val VIDEO: ContentModality = ContentModality(3) + + /** Audio. */ + @JvmField public val AUDIO: ContentModality = ContentModality(4) + + /** Document, e.g. PDF. */ + @JvmField public val DOCUMENT: ContentModality = ContentModality(5) + } +} diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt index 4c05521ad65..a6fe492862b 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/CountTokensResponse.kt @@ -30,21 +30,33 @@ import kotlinx.serialization.Serializable * to the model as a prompt. **Important:** this property does not include billable image, video or * other non-text input. See * [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details. + * @property promptTokensDetails The breakdown, by modality, of how many tokens are consumed by the + * prompt. */ public class CountTokensResponse( public val totalTokens: Int, - public val totalBillableCharacters: Int? = null + public val totalBillableCharacters: Int? = null, + public val promptTokensDetails: List? = null, ) { public operator fun component1(): Int = totalTokens public operator fun component2(): Int? = totalBillableCharacters + public operator fun component3(): List? = promptTokensDetails + @Serializable - internal data class Internal(val totalTokens: Int, val totalBillableCharacters: Int? = null) : - Response { + internal data class Internal( + val totalTokens: Int, + val totalBillableCharacters: Int? = null, + val promptTokensDetails: List? = null + ) : Response { internal fun toPublic(): CountTokensResponse { - return CountTokensResponse(totalTokens, totalBillableCharacters ?: 0) + return CountTokensResponse( + totalTokens, + totalBillableCharacters ?: 0, + promptTokensDetails?.map { it.toPublic() } + ) } } } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ModalityTokenCount.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ModalityTokenCount.kt new file mode 100644 index 00000000000..16b7b1e4207 --- /dev/null +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ModalityTokenCount.kt @@ -0,0 +1,41 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.vertexai.type + +import kotlinx.serialization.Serializable + +/** + * Represents token counting info for a single modality. + * + * @property modality The modality associated with this token count. + * @property tokenCount The number of tokens counted. + */ +public class ModalityTokenCount +private constructor(public val modality: ContentModality, public val tokenCount: Int) { + + public operator fun component1(): ContentModality = modality + + public operator fun component2(): Int = tokenCount + + @Serializable + internal data class Internal( + val modality: ContentModality.Internal, + val tokenCount: Int? = null + ) { + internal fun toPublic() = ModalityTokenCount(modality.toPublic(), tokenCount ?: 0) + } +} diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt index 54f5cbd89b7..5ebbc3639d9 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/UsageMetadata.kt @@ -24,11 +24,17 @@ import kotlinx.serialization.Serializable * @param promptTokenCount Number of tokens in the request. * @param candidatesTokenCount Number of tokens in the response(s). * @param totalTokenCount Total number of tokens. + * @param promptTokensDetails The breakdown, by modality, of how many tokens are consumed by the + * prompt. + * @param candidatesTokensDetails The breakdown, by modality, of how many tokens are consumed by the + * candidates. */ public class UsageMetadata( public val promptTokenCount: Int, public val candidatesTokenCount: Int?, - public val totalTokenCount: Int + public val totalTokenCount: Int, + public val promptTokensDetails: List?, + public val candidatesTokensDetails: List?, ) { @Serializable @@ -36,9 +42,17 @@ public class UsageMetadata( val promptTokenCount: Int? = null, val candidatesTokenCount: Int? = null, val totalTokenCount: Int? = null, + val promptTokensDetails: List? = null, + val candidatesTokensDetails: List? = null, ) { internal fun toPublic(): UsageMetadata = - UsageMetadata(promptTokenCount ?: 0, candidatesTokenCount ?: 0, totalTokenCount ?: 0) + UsageMetadata( + promptTokenCount ?: 0, + candidatesTokenCount ?: 0, + totalTokenCount ?: 0, + promptTokensDetails = promptTokensDetails?.map { it.toPublic() }, + candidatesTokensDetails = candidatesTokensDetails?.map { it.toPublic() } + ) } } diff --git a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt index d538abae76e..e176fd8f7eb 100644 --- a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt +++ b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/UnarySnapshotTests.kt @@ -17,6 +17,7 @@ package com.google.firebase.vertexai import com.google.firebase.vertexai.type.BlockReason +import com.google.firebase.vertexai.type.ContentModality import com.google.firebase.vertexai.type.FinishReason import com.google.firebase.vertexai.type.FunctionCallPart import com.google.firebase.vertexai.type.HarmCategory @@ -34,7 +35,6 @@ import com.google.firebase.vertexai.util.goldenUnaryFile import com.google.firebase.vertexai.util.shouldNotBeNullOrEmpty import io.kotest.assertions.throwables.shouldThrow import io.kotest.inspectors.forAtLeastOne -import io.kotest.matchers.collections.shouldContain import io.kotest.matchers.collections.shouldNotBeEmpty import io.kotest.matchers.nulls.shouldNotBeNull import io.kotest.matchers.should @@ -70,15 +70,27 @@ internal class UnarySnapshotTests { } @Test - fun `long reply`() = - goldenUnaryFile("unary-success-basic-reply-long.json") { + fun `response with detailed token-based usageMetadata`() = + goldenUnaryFile("unary-success-basic-response-long-usage-metadata.json") { withTimeout(testTimeout) { val response = model.generateContent("prompt") response.candidates.isEmpty() shouldBe false response.candidates.first().finishReason shouldBe FinishReason.STOP response.candidates.first().content.parts.isEmpty() shouldBe false - response.candidates.first().safetyRatings.isEmpty() shouldBe false + response.usageMetadata shouldNotBe null + response.usageMetadata?.apply { + totalTokenCount shouldBe 1913 + candidatesTokenCount shouldBe 76 + promptTokensDetails?.forAtLeastOne { + it.modality shouldBe ContentModality.IMAGE + it.tokenCount shouldBe 1806 + } + candidatesTokensDetails?.forAtLeastOne { + it.modality shouldBe ContentModality.TEXT + it.tokenCount shouldBe 76 + } + } } } @@ -469,6 +481,22 @@ internal class UnarySnapshotTests { } } + @Test + fun `countTokens with modality fields returned`() = + goldenUnaryFile("unary-success-detailed-token-response.json") { + withTimeout(testTimeout) { + val response = model.countTokens("prompt") + + response.totalTokens shouldBe 1837 + response.totalBillableCharacters shouldBe 117 + response.promptTokensDetails shouldNotBe null + response.promptTokensDetails?.forAtLeastOne { + it.modality shouldBe ContentModality.IMAGE + it.tokenCount shouldBe 1806 + } + } + } + @Test fun `countTokens succeeds with no billable characters`() = goldenUnaryFile("unary-success-no-billable-characters.json") { diff --git a/firebase-vertexai/update_responses.sh b/firebase-vertexai/update_responses.sh index cb01e1a2c40..70e438090bd 100755 --- a/firebase-vertexai/update_responses.sh +++ b/firebase-vertexai/update_responses.sh @@ -17,7 +17,7 @@ # This script replaces mock response files for Vertex AI unit tests with a fresh # clone of the shared repository of Vertex AI test data. -RESPONSES_VERSION='v5.*' # The major version of mock responses to use +RESPONSES_VERSION='v6.*' # The major version of mock responses to use REPO_NAME="vertexai-sdk-test-data" REPO_LINK="https://github.com/FirebaseExtended/$REPO_NAME.git"