diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index 2ddf594704..c8668ff40a 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -104,6 +104,32 @@ export const geminiModels = { }, ], }, + "gemini-2.5-pro-preview-06-05": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. + outputPrice: 15, + cacheReadsPrice: 0.625, + cacheWritesPrice: 4.5, + maxThinkingTokens: 32_768, + supportsReasoningBudget: true, + tiers: [ + { + contextWindow: 200_000, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.31, + }, + { + contextWindow: Infinity, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.625, + }, + ], + }, "gemini-2.0-flash-001": { maxTokens: 8192, contextWindow: 1_048_576, diff --git a/packages/types/src/providers/openrouter.ts b/packages/types/src/providers/openrouter.ts index 5d6edd844c..08ffceb1e2 100644 --- a/packages/types/src/providers/openrouter.ts +++ b/packages/types/src/providers/openrouter.ts @@ -60,16 +60,26 @@ export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([ "anthropic/claude-opus-4", ]) +// When we first launched these models we didn't have support for +// enabling/disabling the reasoning budget for hybrid models. Now that we +// do support this we should give users the option to enable/disable it +// whenever possible. However these particular (virtual) model ids with the +// `:thinking` suffix always require the reasoning budget to be enabled, so +// for backwards compatibility we should still require it. +// We should *not* be adding new models to this set. +export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([ + "anthropic/claude-3.7-sonnet:thinking", + "google/gemini-2.5-flash-preview-05-20:thinking", +]) + export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", + "google/gemini-2.5-pro-preview", "google/gemini-2.5-flash-preview-05-20", - "google/gemini-2.5-flash-preview-05-20:thinking", -]) - -export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([ + // Also include the models that require the reasoning budget to be enabled + // even though `OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS` takes precedence. "anthropic/claude-3.7-sonnet:thinking", "google/gemini-2.5-flash-preview-05-20:thinking", ]) diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index 11aa1aaa4a..028d308923 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -60,6 +60,16 @@ export const vertexModels = { inputPrice: 2.5, outputPrice: 15, }, + "gemini-2.5-pro-preview-06-05": { + maxTokens: 65_535, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.5, + outputPrice: 15, + maxThinkingTokens: 32_768, + supportsReasoningBudget: true, + }, "gemini-2.5-pro-exp-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, @@ -217,6 +227,7 @@ export const vertexModels = { } as const satisfies Record export const VERTEX_REGIONS = [ + { value: "global", label: "global" }, { value: "us-east5", label: "us-east5" }, { value: "us-central1", label: "us-central1" }, { value: "europe-west1", label: "europe-west1" }, diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 010a8a9fa2..1efe150ab0 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -185,10 +185,11 @@ describe("OpenRouter API", () => { expect(endpoints).toEqual({ Google: { - maxTokens: 0, + maxTokens: 65535, contextWindow: 1048576, supportsImages: true, supportsPromptCache: true, + supportsReasoningBudget: true, inputPrice: 1.25, outputPrice: 10, cacheWritesPrice: 1.625, @@ -198,10 +199,11 @@ describe("OpenRouter API", () => { supportedParameters: undefined, }, "Google AI Studio": { - maxTokens: 0, + maxTokens: 65536, contextWindow: 1048576, supportsImages: true, supportsPromptCache: true, + supportsReasoningBudget: true, inputPrice: 1.25, outputPrice: 10, cacheWritesPrice: 1.625, diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 5addc07a92..6765c8676d 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -14,6 +14,7 @@ import { safeJsonParse } from "../../shared/safeJsonParse" import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format" import type { ApiStream } from "../transform/stream" +import { getModelParams } from "../transform/model-params" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { BaseProvider } from "./base-provider" @@ -62,7 +63,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel() + const { id: model, info, reasoning: thinkingConfig, maxTokens } = this.getModel() const contents = messages.map(convertAnthropicMessageToGemini) @@ -70,7 +71,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl systemInstruction, httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined, thinkingConfig, - maxOutputTokens, + maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined, temperature: this.options.modelTemperature ?? 0, } @@ -81,7 +82,28 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined for await (const chunk of result) { - if (chunk.text) { + // Process candidates and their parts to separate thoughts from content + if (chunk.candidates && chunk.candidates.length > 0) { + const candidate = chunk.candidates[0] + if (candidate.content && candidate.content.parts) { + for (const part of candidate.content.parts) { + if (part.thought) { + // This is a thinking/reasoning part + if (part.text) { + yield { type: "reasoning", text: part.text } + } + } else { + // This is regular content + if (part.text) { + yield { type: "text", text: part.text } + } + } + } + } + } + + // Fallback to the original text property if no candidates structure + else if (chunk.text) { yield { type: "text", text: chunk.text } } @@ -108,32 +130,16 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } override getModel() { - let id = this.options.apiModelId ?? geminiDefaultModelId - let info: ModelInfo = geminiModels[id as GeminiModelId] - - if (id?.endsWith(":thinking")) { - id = id.slice(0, -":thinking".length) - - if (geminiModels[id as GeminiModelId]) { - info = geminiModels[id as GeminiModelId] - - return { - id, - info, - thinkingConfig: this.options.modelMaxThinkingTokens - ? { thinkingBudget: this.options.modelMaxThinkingTokens } - : undefined, - maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined, - } - } - } - - if (!info) { - id = geminiDefaultModelId - info = geminiModels[geminiDefaultModelId] - } - - return { id, info } + const modelId = this.options.apiModelId + let id = modelId && modelId in geminiModels ? (modelId as GeminiModelId) : geminiDefaultModelId + const info: ModelInfo = geminiModels[id] + const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options }) + + // The `:thinking` suffix indicates that the model is a "Hybrid" + // reasoning model and that reasoning is required to be enabled. + // The actual model ID honored by Gemini's API does not have this + // suffix. + return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params } } async completePrompt(prompt: string): Promise { diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index c0656735e7..31300b868b 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -74,6 +74,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH let { id: modelId, maxTokens, temperature, topP, reasoning } = model + // OpenRouter sends reasoning tokens by default for Gemini 2.5 Pro + // Preview even if you don't request them. This is not the default for + // other providers (including Gemini), so we need to explicitly disable + // i We should generalize this using the logic in `getModelParams`, but + // this is easier for now. + if (modelId === "google/gemini-2.5-pro-preview" && typeof reasoning === "undefined") { + reasoning = { exclude: true } + } + // Convert Anthropic messages to OpenAI format. let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, diff --git a/src/api/providers/vertex.ts b/src/api/providers/vertex.ts index fdd51e0666..2c077d97b7 100644 --- a/src/api/providers/vertex.ts +++ b/src/api/providers/vertex.ts @@ -2,6 +2,8 @@ import { type ModelInfo, type VertexModelId, vertexDefaultModelId, vertexModels import type { ApiHandlerOptions } from "../../shared/api" +import { getModelParams } from "../transform/model-params" + import { GeminiHandler } from "./gemini" import { SingleCompletionHandler } from "../index" @@ -11,31 +13,15 @@ export class VertexHandler extends GeminiHandler implements SingleCompletionHand } override getModel() { - let id = this.options.apiModelId ?? vertexDefaultModelId - let info: ModelInfo = vertexModels[id as VertexModelId] - - if (id?.endsWith(":thinking")) { - id = id.slice(0, -":thinking".length) as VertexModelId - - if (vertexModels[id as VertexModelId]) { - info = vertexModels[id as VertexModelId] - - return { - id, - info, - thinkingConfig: this.options.modelMaxThinkingTokens - ? { thinkingBudget: this.options.modelMaxThinkingTokens } - : undefined, - maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined, - } - } - } - - if (!info) { - id = vertexDefaultModelId - info = vertexModels[vertexDefaultModelId] - } - - return { id, info } + const modelId = this.options.apiModelId + let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId + const info: ModelInfo = vertexModels[id] + const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options }) + + // The `:thinking` suffix indicates that the model is a "Hybrid" + // reasoning model and that reasoning is required to be enabled. + // The actual model ID honored by Gemini's API does not have this + // suffix. + return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params } } } diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts index d9a2c749ca..9f33afbb96 100644 --- a/src/api/transform/model-params.ts +++ b/src/api/transform/model-params.ts @@ -5,13 +5,17 @@ import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared import { type AnthropicReasoningParams, type OpenAiReasoningParams, + type GeminiReasoningParams, type OpenRouterReasoningParams, getAnthropicReasoning, getOpenAiReasoning, + getGeminiReasoning, getOpenRouterReasoning, } from "./reasoning" -type GetModelParamsOptions = { +type Format = "anthropic" | "openai" | "gemini" | "openrouter" + +type GetModelParamsOptions = { format: T modelId: string model: ModelInfo @@ -26,14 +30,19 @@ type BaseModelParams = { reasoningBudget: number | undefined } +type AnthropicModelParams = { + format: "anthropic" + reasoning: AnthropicReasoningParams | undefined +} & BaseModelParams + type OpenAiModelParams = { format: "openai" reasoning: OpenAiReasoningParams | undefined } & BaseModelParams -type AnthropicModelParams = { - format: "anthropic" - reasoning: AnthropicReasoningParams | undefined +type GeminiModelParams = { + format: "gemini" + reasoning: GeminiReasoningParams | undefined } & BaseModelParams type OpenRouterModelParams = { @@ -41,11 +50,12 @@ type OpenRouterModelParams = { reasoning: OpenRouterReasoningParams | undefined } & BaseModelParams -export type ModelParams = OpenAiModelParams | AnthropicModelParams | OpenRouterModelParams +export type ModelParams = AnthropicModelParams | OpenAiModelParams | GeminiModelParams | OpenRouterModelParams // Function overloads for specific return types -export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams export function getModelParams(options: GetModelParamsOptions<"anthropic">): AnthropicModelParams +export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams +export function getModelParams(options: GetModelParamsOptions<"gemini">): GeminiModelParams export function getModelParams(options: GetModelParamsOptions<"openrouter">): OpenRouterModelParams export function getModelParams({ format, @@ -53,7 +63,7 @@ export function getModelParams({ model, settings, defaultTemperature = 0, -}: GetModelParamsOptions<"openai" | "anthropic" | "openrouter">): ModelParams { +}: GetModelParamsOptions): ModelParams { const { modelMaxTokens: customMaxTokens, modelMaxThinkingTokens: customMaxThinkingTokens, @@ -121,6 +131,12 @@ export function getModelParams({ ...params, reasoning: getOpenAiReasoning({ model, reasoningBudget, reasoningEffort, settings }), } + } else if (format === "gemini") { + return { + format, + ...params, + reasoning: getGeminiReasoning({ model, reasoningBudget, reasoningEffort, settings }), + } } else { // Special case for o1-pro, which doesn't support temperature. // Note that OpenRouter's `supported_parameters` field includes diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 9887f1137a..a173c59b19 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -1,5 +1,6 @@ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta" import OpenAI from "openai" +import type { GenerateContentConfig } from "@google/genai" import type { ModelInfo, ProviderSettings } from "@roo-code/types" @@ -17,6 +18,8 @@ export type AnthropicReasoningParams = BetaThinkingConfigParam export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] } +export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"] + export type GetModelReasoningOptions = { model: ModelInfo reasoningBudget: number | undefined @@ -49,3 +52,12 @@ export const getOpenAiReasoning = ({ settings, }: GetModelReasoningOptions): OpenAiReasoningParams | undefined => shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined + +export const getGeminiReasoning = ({ + model, + reasoningBudget, + settings, +}: GetModelReasoningOptions): GeminiReasoningParams | undefined => + shouldUseReasoningBudget({ model, settings }) + ? { thinkingBudget: reasoningBudget!, includeThoughts: true } + : undefined diff --git a/webview-ui/src/components/settings/ModelInfoView.tsx b/webview-ui/src/components/settings/ModelInfoView.tsx index d940e66d42..8078b03acd 100644 --- a/webview-ui/src/components/settings/ModelInfoView.tsx +++ b/webview-ui/src/components/settings/ModelInfoView.tsx @@ -73,8 +73,7 @@ export const ModelInfoView = ({ ), apiProvider === "gemini" && ( - {selectedModelId === "gemini-2.5-pro-preview-03-25" || - selectedModelId === "gemini-2.5-pro-preview-05-06" + {selectedModelId.includes("pro-preview") ? t("settings:modelInfo.gemini.billingEstimate") : t("settings:modelInfo.gemini.freeRequests", { count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,