diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 00c3cf8c842..7b67ddacb59 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,4 +1,6 @@ # Unreleased +* [feature] Added support for returning thought summaries, which are synthesized + versions of a model's internal reasoning process. * [fixed] Fixed an issue causing the accessor methods in `GenerateContentResponse` to throw an exception when the response contained no candidates. * [changed] Added better description for requests which fail due to the Gemini API not being diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 1d8ebcbf0c3..92445cb8f45 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -217,6 +217,8 @@ package com.google.firebase.ai.type { ctor public CodeExecutionResultPart(String outcome, String output); method public String getOutcome(); method public String getOutput(); + method public boolean isThought(); + property public boolean isThought; property public final String outcome; property public final String output; } @@ -292,7 +294,9 @@ package com.google.firebase.ai.type { ctor public ExecutableCodePart(String language, String code); method public String getCode(); method public String getLanguage(); + method public boolean isThought(); property public final String code; + property public boolean isThought; property public final String language; } @@ -300,6 +304,8 @@ package com.google.firebase.ai.type { ctor public FileDataPart(String uri, String mimeType); method public String getMimeType(); method public String getUri(); + method public boolean isThought(); + property public boolean isThought; property public final String mimeType; property public final String uri; } @@ -334,8 +340,10 @@ package com.google.firebase.ai.type { method public java.util.Map getArgs(); method public String? getId(); method public String getName(); + method public boolean isThought(); property public final java.util.Map args; property public final String? id; + property public boolean isThought; property public final String name; } @@ -364,7 +372,9 @@ package com.google.firebase.ai.type { method public String? getId(); method public String getName(); method public kotlinx.serialization.json.JsonObject getResponse(); + method public boolean isThought(); property public final String? id; + property public boolean isThought; property public final String name; property public final kotlinx.serialization.json.JsonObject response; } @@ -376,12 +386,14 @@ package com.google.firebase.ai.type { method public java.util.List getInlineDataParts(); method public com.google.firebase.ai.type.PromptFeedback? getPromptFeedback(); method public String? getText(); + method public String? getThoughtSummary(); method public com.google.firebase.ai.type.UsageMetadata? getUsageMetadata(); property public final java.util.List candidates; property public final java.util.List functionCalls; property public final java.util.List inlineDataParts; property public final com.google.firebase.ai.type.PromptFeedback? promptFeedback; property public final String? text; + property public final String? thoughtSummary; property public final com.google.firebase.ai.type.UsageMetadata? usageMetadata; } @@ -552,7 +564,9 @@ package com.google.firebase.ai.type { public final class ImagePart implements com.google.firebase.ai.type.Part { ctor public ImagePart(android.graphics.Bitmap image); method public android.graphics.Bitmap getImage(); + method public boolean isThought(); property public final android.graphics.Bitmap image; + property public boolean isThought; } @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenAspectRatio { @@ -777,7 +791,9 @@ package com.google.firebase.ai.type { ctor public InlineDataPart(byte[] inlineData, String mimeType); method public byte[] getInlineData(); method public String getMimeType(); + method public boolean isThought(); property public final byte[] inlineData; + property public boolean isThought; property public final String mimeType; } @@ -886,6 +902,8 @@ package com.google.firebase.ai.type { } public interface Part { + method public boolean isThought(); + property public abstract boolean isThought; } public final class PartKt { @@ -1146,6 +1164,8 @@ package com.google.firebase.ai.type { public final class TextPart implements com.google.firebase.ai.type.Part { ctor public TextPart(String text); method public String getText(); + method public boolean isThought(); + property public boolean isThought; property public final String text; } @@ -1155,6 +1175,7 @@ package com.google.firebase.ai.type { public static final class ThinkingConfig.Builder { ctor public ThinkingConfig.Builder(); method public com.google.firebase.ai.type.ThinkingConfig build(); + method public com.google.firebase.ai.type.ThinkingConfig.Builder setIncludeThoughts(boolean includeThoughts); method public com.google.firebase.ai.type.ThinkingConfig.Builder setThinkingBudget(int thinkingBudget); } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerateContentResponse.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerateContentResponse.kt index 7e1b44106a2..45398ce356b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerateContentResponse.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerateContentResponse.kt @@ -34,35 +34,55 @@ public class GenerateContentResponse( /** * Convenience field representing all the text parts in the response as a single string. * - * The value is null if the response contains no [candidates]. + * The value is null if the response contains no valid text [candidates]. + * + * Any part that's marked as a thought will be ignored. Learn more about + * [thinking](https://firebase.google.com/docs/ai-logic/thinking?api=dev). */ public val text: String? by lazy { - candidates.firstOrNull()?.content?.parts?.filterIsInstance()?.joinToString(" ") { - it.text - } + val parts = candidates.firstOrNull()?.nonThoughtParts()?.filterIsInstance() + if (parts.isNullOrEmpty()) return@lazy null + parts.joinToString(" ") { it.text } } /** * Convenience field to list all the [FunctionCallPart]s in the response. * * The value is an empty list if the response contains no [candidates]. + * + * Any part that's marked as a thought will be ignored. Learn more about + * [thinking](https://firebase.google.com/docs/ai-logic/thinking?api=dev). */ public val functionCalls: List by lazy { - candidates.firstOrNull()?.content?.parts?.filterIsInstance().orEmpty() + candidates.firstOrNull()?.nonThoughtParts()?.filterIsInstance().orEmpty() + } + + /** + * Convenience field representing all the text parts in the response that are marked as thoughts + * as a single string, if they exists. + * + * Learn more about [thinking](https://firebase.google.com/docs/ai-logic/thinking?api=dev). + */ + public val thoughtSummary: String? by lazy { + candidates.firstOrNull()?.thoughtParts()?.filterIsInstance()?.joinToString(" ") { + it.text + } } /** - * Convenience field representing all the [InlineDataPart]s in the first candidate, if they exist. + * Convenience field representing all the [InlineDataPart]s in the first candidate. * * This also includes any [ImagePart], but they will be represented as [InlineDataPart] instead. * * The value is an empty list if the response contains no [candidates]. + * + * Any part that's marked as a thought will be ignored. Learn more about + * [thinking](https://firebase.google.com/docs/ai-logic/thinking?api=dev). */ public val inlineDataParts: List by lazy { candidates .firstOrNull() - ?.content - ?.parts + ?.nonThoughtParts() ?.let { parts -> parts.filterIsInstance().map { it.toInlineDataPart() } + parts.filterIsInstance() @@ -70,6 +90,10 @@ public class GenerateContentResponse( .orEmpty() } + private fun Candidate.thoughtParts(): List = content.parts.filter { it.isThought } + + private fun Candidate.nonThoughtParts(): List = content.parts.filter { !it.isThought } + @Serializable internal data class Internal( val candidates: List? = null, diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt index 5f3e1bc12a9..34898631479 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt @@ -31,19 +31,43 @@ import kotlinx.serialization.json.jsonObject import org.json.JSONObject /** Interface representing data sent to and received from requests. */ -public interface Part {} +public interface Part { + public val isThought: Boolean +} /** Represents text or string based data sent to and received from requests. */ -public class TextPart(public val text: String) : Part { +public class TextPart +internal constructor( + public val text: String, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor(text: String) : this(text, false, null) - @Serializable internal data class Internal(val text: String) : InternalPart + @Serializable + internal data class Internal( + val text: String, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart } -public class CodeExecutionResultPart(public val outcome: String, public val output: String) : Part { +public class CodeExecutionResultPart +internal constructor( + public val outcome: String, + public val output: String, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor(outcome: String, output: String) : this(outcome, output, false, null) @Serializable internal data class Internal( - @SerialName("codeExecutionResult") val codeExecutionResult: CodeExecutionResult + @SerialName("codeExecutionResult") val codeExecutionResult: CodeExecutionResult, + val thought: Boolean? = null, + val thoughtSignature: String? = null ) : InternalPart { @Serializable @@ -54,11 +78,22 @@ public class CodeExecutionResultPart(public val outcome: String, public val outp } } -public class ExecutableCodePart(public val language: String, public val code: String) : Part { +public class ExecutableCodePart +internal constructor( + public val language: String, + public val code: String, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor(language: String, code: String) : this(language, code, false, null) @Serializable - internal data class Internal(@SerialName("executableCode") val executableCode: ExecutableCode) : - InternalPart { + internal data class Internal( + @SerialName("executableCode") val executableCode: ExecutableCode, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart { @Serializable internal data class ExecutableCode( @@ -74,12 +109,21 @@ public class ExecutableCodePart(public val language: String, public val code: St * * @param image [Bitmap] to convert into a [Part] */ -public class ImagePart(public val image: Bitmap) : Part { +public class ImagePart +internal constructor( + public val image: Bitmap, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor(image: Bitmap) : this(image, false, null) internal fun toInlineDataPart() = InlineDataPart( android.util.Base64.decode(encodeBitmapToBase64Jpeg(image), BASE_64_FLAGS), - "image/jpeg" + "image/jpeg", + isThought, + thoughtSignature ) } @@ -90,11 +134,25 @@ public class ImagePart(public val image: Bitmap) : Part { * @param mimeType an IANA standard MIME type. For supported values, see the * [Vertex AI documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#media_requirements) */ -public class InlineDataPart(public val inlineData: ByteArray, public val mimeType: String) : Part { +public class InlineDataPart +internal constructor( + public val inlineData: ByteArray, + public val mimeType: String, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor( + inlineData: ByteArray, + mimeType: String + ) : this(inlineData, mimeType, false, null) @Serializable - internal data class Internal(@SerialName("inlineData") val inlineData: InlineData) : - InternalPart { + internal data class Internal( + @SerialName("inlineData") val inlineData: InlineData, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart { @Serializable internal data class InlineData(@SerialName("mimeType") val mimeType: String, val data: Base64) @@ -110,15 +168,27 @@ public class InlineDataPart(public val inlineData: ByteArray, public val mimeTyp * have a matching `id` field. */ public class FunctionCallPart -@JvmOverloads -constructor( +internal constructor( public val name: String, public val args: Map, - public val id: String? = null + public val id: String? = null, + public override val isThought: Boolean, + internal val thoughtSignature: String? ) : Part { + @JvmOverloads + public constructor( + name: String, + args: Map, + id: String? = null, + ) : this(name, args, id, false, null) + @Serializable - internal data class Internal(val functionCall: FunctionCall) : InternalPart { + internal data class Internal( + val functionCall: FunctionCall, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart { @Serializable internal data class FunctionCall( @@ -137,15 +207,27 @@ constructor( * @param id Matching `id` for a [FunctionCallPart], if one was provided. */ public class FunctionResponsePart -@JvmOverloads -constructor( +internal constructor( public val name: String, public val response: JsonObject, - public val id: String? = null + public val id: String? = null, + public override val isThought: Boolean, + internal val thoughtSignature: String? ) : Part { + @JvmOverloads + public constructor( + name: String, + response: JsonObject, + id: String? = null + ) : this(name, response, id, false, null) + @Serializable - internal data class Internal(val functionResponse: FunctionResponse) : InternalPart { + internal data class Internal( + val functionResponse: FunctionResponse, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart { @Serializable internal data class FunctionResponse( @@ -168,15 +250,27 @@ constructor( * @param mimeType an IANA standard MIME type. For supported MIME type values see the * [Firebase documentation](https://firebase.google.com/docs/vertex-ai/input-file-requirements). */ -public class FileDataPart(public val uri: String, public val mimeType: String) : Part { +public class FileDataPart +internal constructor( + public val uri: String, + public val mimeType: String, + public override val isThought: Boolean, + internal val thoughtSignature: String? +) : Part { + + public constructor(uri: String, mimeType: String) : this(uri, mimeType, false, null) @Serializable - internal data class Internal(@SerialName("file_data") val fileData: FileData) : InternalPart { + internal data class Internal( + @SerialName("file_data") val fileData: FileData, + val thought: Boolean? = null, + val thoughtSignature: String? = null + ) : InternalPart { @Serializable internal data class FileData( @SerialName("mime_type") val mimeType: String, - @SerialName("file_uri") val fileUri: String, + @SerialName("file_uri") val fileUri: String ) } } @@ -218,31 +312,51 @@ internal object PartSerializer : internal fun Part.toInternal(): InternalPart { return when (this) { - is TextPart -> TextPart.Internal(text) + is TextPart -> TextPart.Internal(text, isThought, thoughtSignature) is ImagePart -> InlineDataPart.Internal( - InlineDataPart.Internal.InlineData("image/jpeg", encodeBitmapToBase64Jpeg(image)) + InlineDataPart.Internal.InlineData("image/jpeg", encodeBitmapToBase64Jpeg(image)), + isThought, + thoughtSignature ) is InlineDataPart -> InlineDataPart.Internal( InlineDataPart.Internal.InlineData( mimeType, android.util.Base64.encodeToString(inlineData, BASE_64_FLAGS) - ) + ), + isThought, + thoughtSignature ) is FunctionCallPart -> - FunctionCallPart.Internal(FunctionCallPart.Internal.FunctionCall(name, args, id)) + FunctionCallPart.Internal( + FunctionCallPart.Internal.FunctionCall(name, args, id), + isThought, + thoughtSignature + ) is FunctionResponsePart -> FunctionResponsePart.Internal( - FunctionResponsePart.Internal.FunctionResponse(name, response, id) + FunctionResponsePart.Internal.FunctionResponse(name, response, id), + isThought, + thoughtSignature ) is FileDataPart -> - FileDataPart.Internal(FileDataPart.Internal.FileData(mimeType = mimeType, fileUri = uri)) + FileDataPart.Internal( + FileDataPart.Internal.FileData(mimeType = mimeType, fileUri = uri), + isThought, + thoughtSignature + ) is ExecutableCodePart -> - ExecutableCodePart.Internal(ExecutableCodePart.Internal.ExecutableCode(language, code)) + ExecutableCodePart.Internal( + ExecutableCodePart.Internal.ExecutableCode(language, code), + isThought, + thoughtSignature + ) is CodeExecutionResultPart -> CodeExecutionResultPart.Internal( - CodeExecutionResultPart.Internal.CodeExecutionResult(outcome, output) + CodeExecutionResultPart.Internal.CodeExecutionResult(outcome, output), + isThought, + thoughtSignature ) else -> throw com.google.firebase.ai.type.SerializationException( @@ -260,28 +374,47 @@ private fun encodeBitmapToBase64Jpeg(input: Bitmap): String { internal fun InternalPart.toPublic(): Part { return when (this) { - is TextPart.Internal -> TextPart(text) + is TextPart.Internal -> TextPart(text, thought ?: false, thoughtSignature) is InlineDataPart.Internal -> { val data = android.util.Base64.decode(inlineData.data, BASE_64_FLAGS) if (inlineData.mimeType.contains("image")) { - ImagePart(decodeBitmapFromImage(data)) + ImagePart(decodeBitmapFromImage(data), thought ?: false, thoughtSignature) } else { - InlineDataPart(data, inlineData.mimeType) + InlineDataPart(data, inlineData.mimeType, thought ?: false, thoughtSignature) } } is FunctionCallPart.Internal -> FunctionCallPart( functionCall.name, functionCall.args.orEmpty().mapValues { it.value ?: JsonNull }, - functionCall.id + functionCall.id, + thought ?: false, + thoughtSignature ) is FunctionResponsePart.Internal -> - FunctionResponsePart(functionResponse.name, functionResponse.response, functionResponse.id) - is FileDataPart.Internal -> FileDataPart(fileData.mimeType, fileData.fileUri) + FunctionResponsePart( + functionResponse.name, + functionResponse.response, + functionResponse.id, + thought ?: false, + thoughtSignature + ) + is FileDataPart.Internal -> + FileDataPart(fileData.mimeType, fileData.fileUri, thought ?: false, thoughtSignature) is ExecutableCodePart.Internal -> - ExecutableCodePart(executableCode.language, executableCode.code) + ExecutableCodePart( + executableCode.language, + executableCode.code, + thought ?: false, + thoughtSignature + ) is CodeExecutionResultPart.Internal -> - CodeExecutionResultPart(codeExecutionResult.outcome, codeExecutionResult.output) + CodeExecutionResultPart( + codeExecutionResult.outcome, + codeExecutionResult.output, + thought ?: false, + thoughtSignature + ) else -> throw com.google.firebase.ai.type.SerializationException( "Unsupported part type \"${javaClass.simpleName}\" provided. This model may not be supported by this SDK." diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ThinkingConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ThinkingConfig.kt index d7de699e9e2..b220de57e86 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ThinkingConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ThinkingConfig.kt @@ -23,6 +23,7 @@ import kotlinx.serialization.Serializable public class ThinkingConfig private constructor( internal val thinkingBudget: Int? = null, + internal val includeThoughts: Boolean? = null ) { public class Builder() { @@ -30,6 +31,10 @@ private constructor( @set:JvmSynthetic // hide void setter from Java public var thinkingBudget: Int? = null + @JvmField + @set:JvmSynthetic // hide void setter from Java + public var includeThoughts: Boolean? = null + /** * Indicates the thinking budget in tokens. `0` is disabled. `-1` is dynamic. The default values * and allowed ranges are model dependent. @@ -38,13 +43,29 @@ private constructor( this.thinkingBudget = thinkingBudget } - public fun build(): ThinkingConfig = ThinkingConfig(thinkingBudget = thinkingBudget) + /** + * Indicates whether to request the model to include the thoughts parts in the response. + * + * Keep in mind that once enabled, you should check for the `isThought` property when processing + * a `Part` instance to correctly handle both thoughts and the actual response. + * + * The default value is `false`. + */ + public fun setIncludeThoughts(includeThoughts: Boolean): Builder = apply { + this.includeThoughts = includeThoughts + } + + public fun build(): ThinkingConfig = + ThinkingConfig(thinkingBudget = thinkingBudget, includeThoughts = includeThoughts) } - internal fun toInternal() = Internal(thinkingBudget) + internal fun toInternal() = Internal(thinkingBudget, includeThoughts) @Serializable - internal data class Internal(@SerialName("thinking_budget") val thinkingBudget: Int?) + internal data class Internal( + @SerialName("thinking_budget") val thinkingBudget: Int? = null, + val includeThoughts: Boolean? = null + ) } /** diff --git a/firebase-ai/src/test/java/com/google/firebase/ai/DevAPIUnarySnapshotTests.kt b/firebase-ai/src/test/java/com/google/firebase/ai/DevAPIUnarySnapshotTests.kt index bd2f69f79f4..10c6f9ec53f 100644 --- a/firebase-ai/src/test/java/com/google/firebase/ai/DevAPIUnarySnapshotTests.kt +++ b/firebase-ai/src/test/java/com/google/firebase/ai/DevAPIUnarySnapshotTests.kt @@ -131,4 +131,23 @@ internal class DevAPIUnarySnapshotTests { groundingMetadata.groundingChunks.forEach { it.web.shouldBeNull() } } } + + @Test + fun `thinking function call and thought signature`() = + goldenDevAPIUnaryFile("unary-success-thinking-function-call-thought-summary-signature.json") { + withTimeout(testTimeout) { + val response = model.generateContent("prompt") + + response.candidates.isNotEmpty() + response.thoughtSummary.shouldNotBeNull() + response.thoughtSummary?.isNotEmpty() + response.functionCalls.isNotEmpty() + response.functionCalls.first().let { + it.thoughtSignature.shouldNotBeNull() + it.thoughtSignature.isNotEmpty() + } + // There's no text in the response + response.text.shouldBeNull() + } + } } diff --git a/firebase-ai/src/test/java/com/google/firebase/ai/common/APIControllerTests.kt b/firebase-ai/src/test/java/com/google/firebase/ai/common/APIControllerTests.kt index f3b192a6b9a..c0e866e794a 100644 --- a/firebase-ai/src/test/java/com/google/firebase/ai/common/APIControllerTests.kt +++ b/firebase-ai/src/test/java/com/google/firebase/ai/common/APIControllerTests.kt @@ -41,7 +41,6 @@ import io.ktor.http.HttpHeaders import io.ktor.http.HttpStatusCode import io.ktor.http.headersOf import io.ktor.utils.io.ByteChannel -import io.ktor.utils.io.close import io.ktor.utils.io.writeFully import kotlin.time.Duration import kotlin.time.Duration.Companion.milliseconds diff --git a/firebase-ai/src/test/java/com/google/firebase/ai/type/ThinkingConfigTest.kt b/firebase-ai/src/test/java/com/google/firebase/ai/type/ThinkingConfigTest.kt index 009c039e906..815699806ed 100644 --- a/firebase-ai/src/test/java/com/google/firebase/ai/type/ThinkingConfigTest.kt +++ b/firebase-ai/src/test/java/com/google/firebase/ai/type/ThinkingConfigTest.kt @@ -39,6 +39,21 @@ internal class ThinkingConfigTest { Json.encodeToString(thinkingConfig.toInternal()).shouldEqualJson(expectedJson) } + @Test + fun `Include thought thinkingConfig`() { + val thinkingConfig = ThinkingConfig.Builder().setIncludeThoughts(true).build() + // CamelCase or snake_case work equally fine + val expectedJson = + """ + { + "includeThoughts": true + } + """ + .trimIndent() + + Json.encodeToString(thinkingConfig.toInternal()).shouldEqualJson(expectedJson) + } + @Test fun `thinkingConfig DSL correctly delegates to ThinkingConfig#Builder`() { val thinkingConfig = ThinkingConfig.Builder().setThinkingBudget(1024).build()