diff --git a/.changeset/dry-ducks-report.md b/.changeset/dry-ducks-report.md new file mode 100644 index 0000000000..ca6654ffaf --- /dev/null +++ b/.changeset/dry-ducks-report.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Fix reasoning budget for Gemini 2.5 Flash on OpenRouter diff --git a/src/api/providers/fetchers/__tests__/litellm.test.ts b/src/api/providers/fetchers/__tests__/litellm.test.ts index 4f59c3ab31..e908b6cef0 100644 --- a/src/api/providers/fetchers/__tests__/litellm.test.ts +++ b/src/api/providers/fetchers/__tests__/litellm.test.ts @@ -1,6 +1,6 @@ import axios from "axios" import { getLiteLLMModels } from "../litellm" -import { COMPUTER_USE_MODELS } from "../../../../shared/api" +import { OPEN_ROUTER_COMPUTER_USE_MODELS } from "../../../../shared/api" // Mock axios jest.mock("axios") @@ -105,7 +105,7 @@ describe("getLiteLLMModels", () => { }) it("handles computer use models correctly", async () => { - const computerUseModel = Array.from(COMPUTER_USE_MODELS)[0] + const computerUseModel = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)[0] const mockResponse = { data: { data: [ diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 03aca82fc4..e7f9e7ddd2 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -4,7 +4,12 @@ import * as path from "path" import { back as nockBack } from "nock" -import { PROMPT_CACHING_MODELS } from "../../../../shared/api" +import { + OPEN_ROUTER_PROMPT_CACHING_MODELS, + OPEN_ROUTER_COMPUTER_USE_MODELS, + OPEN_ROUTER_REASONING_BUDGET_MODELS, + OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS, +} from "../../../../shared/api" import { getOpenRouterModelEndpoints, getOpenRouterModels } from "../openrouter" @@ -23,22 +28,14 @@ describe("OpenRouter API", () => { .filter(([_, model]) => model.supportsPromptCache) .map(([id, _]) => id) .sort(), - ).toEqual(Array.from(PROMPT_CACHING_MODELS).sort()) + ).toEqual(Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).sort()) expect( Object.entries(models) .filter(([_, model]) => model.supportsComputerUse) .map(([id, _]) => id) .sort(), - ).toEqual([ - "anthropic/claude-3.5-sonnet", - "anthropic/claude-3.5-sonnet:beta", - "anthropic/claude-3.7-sonnet", - "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-opus-4", - "anthropic/claude-sonnet-4", - ]) + ).toEqual(Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS).sort()) expect( Object.entries(models) @@ -108,19 +105,14 @@ describe("OpenRouter API", () => { .filter(([_, model]) => model.supportsReasoningBudget) .map(([id, _]) => id) .sort(), - ).toEqual([ - "anthropic/claude-3.7-sonnet:beta", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-opus-4", - "anthropic/claude-sonnet-4", - ]) + ).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort()) expect( Object.entries(models) .filter(([_, model]) => model.requiredReasoningBudget) .map(([id, _]) => id) .sort(), - ).toEqual(["anthropic/claude-3.7-sonnet:thinking"]) + ).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort()) expect(models["anthropic/claude-3.7-sonnet"]).toEqual({ maxTokens: 8192, @@ -155,6 +147,8 @@ describe("OpenRouter API", () => { supportedParameters: ["max_tokens", "temperature", "reasoning", "include_reasoning"], }) + expect(models["google/gemini-2.5-flash-preview-05-20"].maxTokens).toEqual(65535) + const anthropicModels = Object.entries(models) .filter(([id, _]) => id.startsWith("anthropic/claude-3")) .map(([id, model]) => ({ id, maxTokens: model.maxTokens })) @@ -200,7 +194,6 @@ describe("OpenRouter API", () => { cacheWritesPrice: 1.625, cacheReadsPrice: 0.31, description: undefined, - supportsReasoningBudget: false, supportsReasoningEffort: undefined, supportedParameters: undefined, }, @@ -214,7 +207,6 @@ describe("OpenRouter API", () => { cacheWritesPrice: 1.625, cacheReadsPrice: 0.31, description: undefined, - supportsReasoningBudget: false, supportsReasoningEffort: undefined, supportedParameters: undefined, }, diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts index 713237a627..8fb495c63e 100644 --- a/src/api/providers/fetchers/litellm.ts +++ b/src/api/providers/fetchers/litellm.ts @@ -1,5 +1,5 @@ import axios from "axios" -import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api" +import { OPEN_ROUTER_COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api" /** * Fetches available models from a LiteLLM server @@ -22,7 +22,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise const response = await axios.get(`${baseUrl}/v1/model/info`, { headers, timeout: 5000 }) const models: ModelRecord = {} - const computerModels = Array.from(COMPUTER_USE_MODELS) + const computerModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS) // Process the model info from the response if (response.data && response.data.data && Array.isArray(response.data.data)) { diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index d40d7cf0a9..3841b11246 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -2,8 +2,14 @@ import axios from "axios" import { z } from "zod" import { isModelParameter } from "../../../schemas" -import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../constants" -import { ApiHandlerOptions, ModelInfo, COMPUTER_USE_MODELS, anthropicModels } from "../../../shared/api" +import { + ApiHandlerOptions, + ModelInfo, + OPEN_ROUTER_COMPUTER_USE_MODELS, + OPEN_ROUTER_REASONING_BUDGET_MODELS, + OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS, + anthropicModels, +} from "../../../shared/api" import { parseApiPrice } from "../../../utils/cost" /** @@ -106,7 +112,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise< id, model, modality: architecture?.modality, - maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0, + maxTokens: top_provider?.max_completion_tokens, supportedParameters: supported_parameters, }) } @@ -146,7 +152,7 @@ export async function getOpenRouterModelEndpoints( id, model: endpoint, modality: architecture?.modality, - maxTokens: id.startsWith("anthropic/") ? endpoint.max_completion_tokens : 0, + maxTokens: endpoint.max_completion_tokens, }) } } catch (error) { @@ -183,8 +189,10 @@ export const parseOpenRouterModel = ({ const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined" + const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/") + const modelInfo: ModelInfo = { - maxTokens: maxTokens || 0, + maxTokens: useMaxTokens ? maxTokens || 0 : 0, contextWindow: model.context_length, supportsImages: modality?.includes("image") ?? false, supportsPromptCache, @@ -193,20 +201,24 @@ export const parseOpenRouterModel = ({ cacheWritesPrice, cacheReadsPrice, description: model.description, - supportsReasoningBudget: - id.startsWith("anthropic/claude-3.7") || - id.startsWith("anthropic/claude-sonnet-4") || - id.startsWith("anthropic/claude-opus-4"), supportsReasoningEffort: supportedParameters ? supportedParameters.includes("reasoning") : undefined, supportedParameters: supportedParameters ? supportedParameters.filter(isModelParameter) : undefined, } // The OpenRouter model definition doesn't give us any hints about // computer use, so we need to set that manually. - if (COMPUTER_USE_MODELS.has(id)) { + if (OPEN_ROUTER_COMPUTER_USE_MODELS.has(id)) { modelInfo.supportsComputerUse = true } + if (OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id)) { + modelInfo.supportsReasoningBudget = true + } + + if (OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS.has(id)) { + modelInfo.requiredReasoningBudget = true + } + // For backwards compatibility with the old model definitions we will // continue to disable extending thinking for anthropic/claude-3.7-sonnet // and force it for anthropic/claude-3.7-sonnet:thinking. @@ -219,7 +231,6 @@ export const parseOpenRouterModel = ({ if (id === "anthropic/claude-3.7-sonnet:thinking") { modelInfo.maxTokens = anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens - modelInfo.requiredReasoningBudget = true } return modelInfo diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 5d1682f821..e7a8139864 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -6,7 +6,7 @@ import { ModelRecord, openRouterDefaultModelId, openRouterDefaultModelInfo, - PROMPT_CACHING_MODELS, + OPEN_ROUTER_PROMPT_CACHING_MODELS, } from "../../shared/api" import { convertToOpenAiMessages } from "../transform/openai-format" @@ -87,7 +87,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH // https://openrouter.ai/docs/features/prompt-caching // TODO: Add a `promptCacheStratey` field to `ModelInfo`. - if (PROMPT_CACHING_MODELS.has(modelId)) { + if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) { if (modelId.startsWith("google")) { addGeminiCacheBreakpoints(systemPrompt, openAiMessages) } else { diff --git a/src/shared/api.ts b/src/shared/api.ts index d0f3667964..25196c6f10 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -1836,7 +1836,7 @@ export const chutesModels = { */ // These models support prompt caching. -export const PROMPT_CACHING_MODELS = new Set([ +export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([ "anthropic/claude-3-haiku", "anthropic/claude-3-haiku:beta", "anthropic/claude-3-opus", @@ -1867,7 +1867,7 @@ export const PROMPT_CACHING_MODELS = new Set([ ]) // https://www.anthropic.com/news/3-5-models-and-computer-use -export const COMPUTER_USE_MODELS = new Set([ +export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([ "anthropic/claude-3.5-sonnet", "anthropic/claude-3.5-sonnet:beta", "anthropic/claude-3.7-sonnet", @@ -1877,6 +1877,20 @@ export const COMPUTER_USE_MODELS = new Set([ "anthropic/claude-opus-4", ]) +export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ + "anthropic/claude-3.7-sonnet:beta", + "anthropic/claude-3.7-sonnet:thinking", + "anthropic/claude-opus-4", + "anthropic/claude-sonnet-4", + "google/gemini-2.5-flash-preview-05-20", + "google/gemini-2.5-flash-preview-05-20:thinking", +]) + +export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([ + "anthropic/claude-3.7-sonnet:thinking", + "google/gemini-2.5-flash-preview-05-20:thinking", +]) + const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const export type RouterName = (typeof routerNames)[number] diff --git a/webview-ui/src/components/settings/constants.ts b/webview-ui/src/components/settings/constants.ts index 707644d4db..295088f9de 100644 --- a/webview-ui/src/components/settings/constants.ts +++ b/webview-ui/src/components/settings/constants.ts @@ -13,8 +13,6 @@ import { chutesModels, } from "@roo/shared/api" -export { PROMPT_CACHING_MODELS } from "@roo/shared/api" - export { AWS_REGIONS } from "@roo/shared/aws_regions" export const MODELS_BY_PROVIDER: Partial>> = {