diff --git a/.changeset/hip-impalas-divide.md b/.changeset/hip-impalas-divide.md new file mode 100644 index 00000000000..49f7cb8836d --- /dev/null +++ b/.changeset/hip-impalas-divide.md @@ -0,0 +1,6 @@ +--- +'firebase': minor +'@firebase/ai': minor +--- + +Add support for Thinking Budget. diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md index 8e150132cac..ffd358e5d3e 100644 --- a/common/api-review/ai.api.md +++ b/common/api-review/ai.api.md @@ -381,6 +381,7 @@ export interface GenerationConfig { stopSequences?: string[]; // (undocumented) temperature?: number; + thinkingConfig?: ThinkingConfig; // (undocumented) topK?: number; // (undocumented) @@ -925,6 +926,11 @@ export interface TextPart { text: string; } +// @public +export interface ThinkingConfig { + thinkingBudget?: number; +} + // @public export type Tool = FunctionDeclarationsTool | GoogleSearchTool; @@ -947,6 +953,7 @@ export interface UsageMetadata { promptTokenCount: number; // (undocumented) promptTokensDetails?: ModalityTokenCount[]; + thoughtsTokenCount?: number; // (undocumented) totalTokenCount: number; } diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml index 90c3d840bfa..344fa784904 100644 --- a/docs-devsite/_toc.yaml +++ b/docs-devsite/_toc.yaml @@ -148,6 +148,8 @@ toc: path: /docs/reference/js/ai.stringschema.md - title: TextPart path: /docs/reference/js/ai.textpart.md + - title: ThinkingConfig + path: /docs/reference/js/ai.thinkingconfig.md - title: ToolConfig path: /docs/reference/js/ai.toolconfig.md - title: UsageMetadata diff --git a/docs-devsite/ai.generationconfig.md b/docs-devsite/ai.generationconfig.md index f9697a07454..e791bde9485 100644 --- a/docs-devsite/ai.generationconfig.md +++ b/docs-devsite/ai.generationconfig.md @@ -31,6 +31,7 @@ export interface GenerationConfig | [responseSchema](./ai.generationconfig.md#generationconfigresponseschema) | [TypedSchema](./ai.md#typedschema) \| [SchemaRequest](./ai.schemarequest.md#schemarequest_interface) | Output response schema of the generated candidate text. This value can be a class generated with a [Schema](./ai.schema.md#schema_class) static method like Schema.string() or Schema.object() or it can be a plain JS object matching the [SchemaRequest](./ai.schemarequest.md#schemarequest_interface) interface.
Note: This only applies when the specified responseMIMEType supports a schema; currently this is limited to application/json and text/x.enum. | | [stopSequences](./ai.generationconfig.md#generationconfigstopsequences) | string\[\] | | | [temperature](./ai.generationconfig.md#generationconfigtemperature) | number | | +| [thinkingConfig](./ai.generationconfig.md#generationconfigthinkingconfig) | [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models. | | [topK](./ai.generationconfig.md#generationconfigtopk) | number | | | [topP](./ai.generationconfig.md#generationconfigtopp) | number | | @@ -117,6 +118,16 @@ stopSequences?: string[]; temperature?: number; ``` +## GenerationConfig.thinkingConfig + +Configuration for "thinking" behavior of compatible Gemini models. + +Signature: + +```typescript +thinkingConfig?: ThinkingConfig; +``` + ## GenerationConfig.topK Signature: diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index 0c57da2e154..96bb63e83ee 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -124,6 +124,7 @@ The Firebase AI Web SDK. | [Segment](./ai.segment.md#segment_interface) | Represents a specific segment within a [Content](./ai.content.md#content_interface) object, often used to pinpoint the exact location of text or data that grounding information refers to. | | [StartChatParams](./ai.startchatparams.md#startchatparams_interface) | Params for [GenerativeModel.startChat()](./ai.generativemodel.md#generativemodelstartchat). | | [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. | +| [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. | | [ToolConfig](./ai.toolconfig.md#toolconfig_interface) | Tool config. This config is shared for all tools provided in the request. | | [UsageMetadata](./ai.usagemetadata.md#usagemetadata_interface) | Usage metadata about a [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface). | | [VertexAIOptions](./ai.vertexaioptions.md#vertexaioptions_interface) | Options when initializing the Firebase AI SDK. | diff --git a/docs-devsite/ai.thinkingconfig.md b/docs-devsite/ai.thinkingconfig.md new file mode 100644 index 00000000000..ec348a20487 --- /dev/null +++ b/docs-devsite/ai.thinkingconfig.md @@ -0,0 +1,43 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# ThinkingConfig interface +Configuration for "thinking" behavior of compatible Gemini models. + +Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. + +Signature: + +```typescript +export interface ThinkingConfig +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [thinkingBudget](./ai.thinkingconfig.md#thinkingconfigthinkingbudget) | number | The thinking budget, in tokens.This parameter sets an upper limit on the number of tokens the model can use for its internal "thinking" process. A higher budget may result in higher quality responses for complex tasks but can also increase latency and cost.If you don't specify a budget, the model will determine the appropriate amount of thinking based on the complexity of the prompt.An error will be thrown if you set a thinking budget for a model that does not support this feature or if the specified budget is not within the model's supported range. | + +## ThinkingConfig.thinkingBudget + +The thinking budget, in tokens. + +This parameter sets an upper limit on the number of tokens the model can use for its internal "thinking" process. A higher budget may result in higher quality responses for complex tasks but can also increase latency and cost. + +If you don't specify a budget, the model will determine the appropriate amount of thinking based on the complexity of the prompt. + +An error will be thrown if you set a thinking budget for a model that does not support this feature or if the specified budget is not within the model's supported range. + +Signature: + +```typescript +thinkingBudget?: number; +``` diff --git a/docs-devsite/ai.usagemetadata.md b/docs-devsite/ai.usagemetadata.md index 4211fea72b4..954fcc6e530 100644 --- a/docs-devsite/ai.usagemetadata.md +++ b/docs-devsite/ai.usagemetadata.md @@ -26,6 +26,7 @@ export interface UsageMetadata | [candidatesTokensDetails](./ai.usagemetadata.md#usagemetadatacandidatestokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)\[\] | | | [promptTokenCount](./ai.usagemetadata.md#usagemetadataprompttokencount) | number | | | [promptTokensDetails](./ai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)\[\] | | +| [thoughtsTokenCount](./ai.usagemetadata.md#usagemetadatathoughtstokencount) | number | The number of tokens used by the model's internal "thinking" process. | | [totalTokenCount](./ai.usagemetadata.md#usagemetadatatotaltokencount) | number | | ## UsageMetadata.candidatesTokenCount @@ -60,6 +61,16 @@ promptTokenCount: number; promptTokensDetails?: ModalityTokenCount[]; ``` +## UsageMetadata.thoughtsTokenCount + +The number of tokens used by the model's internal "thinking" process. + +Signature: + +```typescript +thoughtsTokenCount?: number; +``` + ## UsageMetadata.totalTokenCount Signature: diff --git a/packages/ai/integration/generate-content.test.ts b/packages/ai/integration/generate-content.test.ts index 2c1340186b0..0b83df38ecb 100644 --- a/packages/ai/integration/generate-content.test.ts +++ b/packages/ai/integration/generate-content.test.ts @@ -91,6 +91,10 @@ describe('Generate Content', () => { 2, TOKEN_COUNT_DELTA ); + expect(response.usageMetadata!.thoughtsTokenCount).to.be.closeTo( + 30, + TOKEN_COUNT_DELTA * 2 + ); expect(response.usageMetadata!.totalTokenCount).to.be.closeTo( 55, TOKEN_COUNT_DELTA * 2 diff --git a/packages/ai/src/types/requests.ts b/packages/ai/src/types/requests.ts index b74992b839d..be18fb16797 100644 --- a/packages/ai/src/types/requests.ts +++ b/packages/ai/src/types/requests.ts @@ -113,6 +113,10 @@ export interface GenerationConfig { * @beta */ responseModalities?: ResponseModality[]; + /** + * Configuration for "thinking" behavior of compatible Gemini models. + */ + thinkingConfig?: ThinkingConfig; } /** @@ -266,3 +270,28 @@ export interface FunctionCallingConfig { mode?: FunctionCallingMode; allowedFunctionNames?: string[]; } + +/** + * Configuration for "thinking" behavior of compatible Gemini models. + * + * Certain models utilize a thinking process before generating a response. This allows them to + * reason through complex problems and plan a more coherent and accurate answer. + * + * @public + */ +export interface ThinkingConfig { + /** + * The thinking budget, in tokens. + * + * This parameter sets an upper limit on the number of tokens the model can use for its internal + * "thinking" process. A higher budget may result in higher quality responses for complex tasks + * but can also increase latency and cost. + * + * If you don't specify a budget, the model will determine the appropriate amount + * of thinking based on the complexity of the prompt. + * + * An error will be thrown if you set a thinking budget for a model that does not support this + * feature or if the specified budget is not within the model's supported range. + */ + thinkingBudget?: number; +} diff --git a/packages/ai/src/types/responses.ts b/packages/ai/src/types/responses.ts index 5e2fa3b65ca..5fab203600d 100644 --- a/packages/ai/src/types/responses.ts +++ b/packages/ai/src/types/responses.ts @@ -92,6 +92,10 @@ export interface GenerateContentResponse { export interface UsageMetadata { promptTokenCount: number; candidatesTokenCount: number; + /** + * The number of tokens used by the model's internal "thinking" process. + */ + thoughtsTokenCount?: number; totalTokenCount: number; promptTokensDetails?: ModalityTokenCount[]; candidatesTokensDetails?: ModalityTokenCount[];