Fix reasoning budget for Gemini 2.5 Flash on OpenRouter (#3945)

cte · mrubens · web-flow · commit 77a5b6c06df2 · 2025-05-24T19:46:42.000-04:00
Co-authored-by: Matt Rubens &lt;mrubens@users.noreply.github.com&gt;
diff --git a/.changeset/dry-ducks-report.md b/.changeset/dry-ducks-report.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Fix reasoning budget for Gemini 2.5 Flash on OpenRouter
diff --git a/src/api/providers/fetchers/__tests__/litellm.test.ts b/src/api/providers/fetchers/__tests__/litellm.test.ts
@@ -1,6 +1,6 @@
 import axios from "axios"
 import { getLiteLLMModels } from "../litellm"
-import { COMPUTER_USE_MODELS } from "../../../../shared/api"
+import { OPEN_ROUTER_COMPUTER_USE_MODELS } from "../../../../shared/api"
 
 // Mock axios
 jest.mock("axios")
@@ -105,7 +105,7 @@ describe("getLiteLLMModels", () => {
 	})
 
 	it("handles computer use models correctly", async () => {
-		const computerUseModel = Array.from(COMPUTER_USE_MODELS)[0]
+		const computerUseModel = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)[0]
 		const mockResponse = {
 			data: {
 				data: [
diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts
@@ -4,7 +4,12 @@ import * as path from "path"
 
 import { back as nockBack } from "nock"
 
-import { PROMPT_CACHING_MODELS } from "../../../../shared/api"
+import {
+	OPEN_ROUTER_PROMPT_CACHING_MODELS,
+	OPEN_ROUTER_COMPUTER_USE_MODELS,
+	OPEN_ROUTER_REASONING_BUDGET_MODELS,
+	OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
+} from "../../../../shared/api"
 
 import { getOpenRouterModelEndpoints, getOpenRouterModels } from "../openrouter"
 
@@ -23,22 +28,14 @@ describe("OpenRouter API", () => {
 					.filter(([_, model]) => model.supportsPromptCache)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual(Array.from(PROMPT_CACHING_MODELS).sort())
+			).toEqual(Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).sort())
 
 			expect(
 				Object.entries(models)
 					.filter(([_, model]) => model.supportsComputerUse)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual([
-				"anthropic/claude-3.5-sonnet",
-				"anthropic/claude-3.5-sonnet:beta",
-				"anthropic/claude-3.7-sonnet",
-				"anthropic/claude-3.7-sonnet:beta",
-				"anthropic/claude-3.7-sonnet:thinking",
-				"anthropic/claude-opus-4",
-				"anthropic/claude-sonnet-4",
-			])
+			).toEqual(Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS).sort())
 
 			expect(
 				Object.entries(models)
@@ -108,19 +105,14 @@ describe("OpenRouter API", () => {
 					.filter(([_, model]) => model.supportsReasoningBudget)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual([
-				"anthropic/claude-3.7-sonnet:beta",
-				"anthropic/claude-3.7-sonnet:thinking",
-				"anthropic/claude-opus-4",
-				"anthropic/claude-sonnet-4",
-			])
+			).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort())
 
 			expect(
 				Object.entries(models)
 					.filter(([_, model]) => model.requiredReasoningBudget)
 					.map(([id, _]) => id)
 					.sort(),
-			).toEqual(["anthropic/claude-3.7-sonnet:thinking"])
+			).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort())
 
 			expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
 				maxTokens: 8192,
@@ -155,6 +147,8 @@ describe("OpenRouter API", () => {
 				supportedParameters: ["max_tokens", "temperature", "reasoning", "include_reasoning"],
 			})
 
+			expect(models["google/gemini-2.5-flash-preview-05-20"].maxTokens).toEqual(65535)
+
 			const anthropicModels = Object.entries(models)
 				.filter(([id, _]) => id.startsWith("anthropic/claude-3"))
 				.map(([id, model]) => ({ id, maxTokens: model.maxTokens }))
@@ -200,7 +194,6 @@ describe("OpenRouter API", () => {
 					cacheWritesPrice: 1.625,
 					cacheReadsPrice: 0.31,
 					description: undefined,
-					supportsReasoningBudget: false,
 					supportsReasoningEffort: undefined,
 					supportedParameters: undefined,
 				},
@@ -214,7 +207,6 @@ describe("OpenRouter API", () => {
 					cacheWritesPrice: 1.625,
 					cacheReadsPrice: 0.31,
 					description: undefined,
-					supportsReasoningBudget: false,
 					supportsReasoningEffort: undefined,
 					supportedParameters: undefined,
 				},
diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts
@@ -1,5 +1,5 @@
 import axios from "axios"
-import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
+import { OPEN_ROUTER_COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
 
 /**
  * Fetches available models from a LiteLLM server
@@ -22,7 +22,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 		const response = await axios.get(`${baseUrl}/v1/model/info`, { headers, timeout: 5000 })
 		const models: ModelRecord = {}
 
-		const computerModels = Array.from(COMPUTER_USE_MODELS)
+		const computerModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)
 
 		// Process the model info from the response
 		if (response.data && response.data.data && Array.isArray(response.data.data)) {
diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts
@@ -2,8 +2,14 @@ import axios from "axios"
 import { z } from "zod"
 
 import { isModelParameter } from "../../../schemas"
-import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../constants"
-import { ApiHandlerOptions, ModelInfo, COMPUTER_USE_MODELS, anthropicModels } from "../../../shared/api"
+import {
+	ApiHandlerOptions,
+	ModelInfo,
+	OPEN_ROUTER_COMPUTER_USE_MODELS,
+	OPEN_ROUTER_REASONING_BUDGET_MODELS,
+	OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
+	anthropicModels,
+} from "../../../shared/api"
 import { parseApiPrice } from "../../../utils/cost"
 
 /**
@@ -106,7 +112,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
 				id,
 				model,
 				modality: architecture?.modality,
-				maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
+				maxTokens: top_provider?.max_completion_tokens,
 				supportedParameters: supported_parameters,
 			})
 		}
@@ -146,7 +152,7 @@ export async function getOpenRouterModelEndpoints(
 				id,
 				model: endpoint,
 				modality: architecture?.modality,
-				maxTokens: id.startsWith("anthropic/") ? endpoint.max_completion_tokens : 0,
+				maxTokens: endpoint.max_completion_tokens,
 			})
 		}
 	} catch (error) {
@@ -183,8 +189,10 @@ export const parseOpenRouterModel = ({
 
 	const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
 
+	const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")
+
 	const modelInfo: ModelInfo = {
-		maxTokens: maxTokens || 0,
+		maxTokens: useMaxTokens ? maxTokens || 0 : 0,
 		contextWindow: model.context_length,
 		supportsImages: modality?.includes("image") ?? false,
 		supportsPromptCache,
@@ -193,20 +201,24 @@ export const parseOpenRouterModel = ({
 		cacheWritesPrice,
 		cacheReadsPrice,
 		description: model.description,
-		supportsReasoningBudget:
-			id.startsWith("anthropic/claude-3.7") ||
-			id.startsWith("anthropic/claude-sonnet-4") ||
-			id.startsWith("anthropic/claude-opus-4"),
 		supportsReasoningEffort: supportedParameters ? supportedParameters.includes("reasoning") : undefined,
 		supportedParameters: supportedParameters ? supportedParameters.filter(isModelParameter) : undefined,
 	}
 
 	// The OpenRouter model definition doesn't give us any hints about
 	// computer use, so we need to set that manually.
-	if (COMPUTER_USE_MODELS.has(id)) {
+	if (OPEN_ROUTER_COMPUTER_USE_MODELS.has(id)) {
 		modelInfo.supportsComputerUse = true
 	}
 
+	if (OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id)) {
+		modelInfo.supportsReasoningBudget = true
+	}
+
+	if (OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS.has(id)) {
+		modelInfo.requiredReasoningBudget = true
+	}
+
 	// For backwards compatibility with the old model definitions we will
 	// continue to disable extending thinking for anthropic/claude-3.7-sonnet
 	// and force it for anthropic/claude-3.7-sonnet:thinking.
@@ -219,7 +231,6 @@ export const parseOpenRouterModel = ({
 
 	if (id === "anthropic/claude-3.7-sonnet:thinking") {
 		modelInfo.maxTokens = anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens
-		modelInfo.requiredReasoningBudget = true
 	}
 
 	return modelInfo
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -6,7 +6,7 @@ import {
 	ModelRecord,
 	openRouterDefaultModelId,
 	openRouterDefaultModelInfo,
-	PROMPT_CACHING_MODELS,
+	OPEN_ROUTER_PROMPT_CACHING_MODELS,
 } from "../../shared/api"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -87,7 +87,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		// https://openrouter.ai/docs/features/prompt-caching
 		// TODO: Add a `promptCacheStratey` field to `ModelInfo`.
-		if (PROMPT_CACHING_MODELS.has(modelId)) {
+		if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) {
 			if (modelId.startsWith("google")) {
 				addGeminiCacheBreakpoints(systemPrompt, openAiMessages)
 			} else {
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -1836,7 +1836,7 @@ export const chutesModels = {
  */
 
 // These models support prompt caching.
-export const PROMPT_CACHING_MODELS = new Set([
+export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-3-haiku",
 	"anthropic/claude-3-haiku:beta",
 	"anthropic/claude-3-opus",
@@ -1867,7 +1867,7 @@ export const PROMPT_CACHING_MODELS = new Set([
 ])
 
 // https://www.anthropic.com/news/3-5-models-and-computer-use
-export const COMPUTER_USE_MODELS = new Set([
+export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
 	"anthropic/claude-3.5-sonnet",
 	"anthropic/claude-3.5-sonnet:beta",
 	"anthropic/claude-3.7-sonnet",
@@ -1877,6 +1877,20 @@ export const COMPUTER_USE_MODELS = new Set([
 	"anthropic/claude-opus-4",
 ])
 
+export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
+	"anthropic/claude-3.7-sonnet:beta",
+	"anthropic/claude-3.7-sonnet:thinking",
+	"anthropic/claude-opus-4",
+	"anthropic/claude-sonnet-4",
+	"google/gemini-2.5-flash-preview-05-20",
+	"google/gemini-2.5-flash-preview-05-20:thinking",
+])
+
+export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
+	"anthropic/claude-3.7-sonnet:thinking",
+	"google/gemini-2.5-flash-preview-05-20:thinking",
+])
+
 const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
 
 export type RouterName = (typeof routerNames)[number]
diff --git a/webview-ui/src/components/settings/constants.ts b/webview-ui/src/components/settings/constants.ts
@@ -13,8 +13,6 @@ import {
 	chutesModels,
 } from "@roo/shared/api"
 
-export { PROMPT_CACHING_MODELS } from "@roo/shared/api"
-
 export { AWS_REGIONS } from "@roo/shared/aws_regions"
 
 export const MODELS_BY_PROVIDER: Partial<Record<ProviderName, Record<string, ModelInfo>>> = {

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-cline": patch
 +---
++
 +Fix reasoning budget for Gemini 2.5 Flash on OpenRouter