Skip to content

Commit 77a5b6c

Browse files
ctemrubens
andauthored
Fix reasoning budget for Gemini 2.5 Flash on OpenRouter (#3945)
Co-authored-by: Matt Rubens <[email protected]>
1 parent 68b60a2 commit 77a5b6c

File tree

8 files changed

+61
-41
lines changed

8 files changed

+61
-41
lines changed

.changeset/dry-ducks-report.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Fix reasoning budget for Gemini 2.5 Flash on OpenRouter

src/api/providers/fetchers/__tests__/litellm.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import axios from "axios"
22
import { getLiteLLMModels } from "../litellm"
3-
import { COMPUTER_USE_MODELS } from "../../../../shared/api"
3+
import { OPEN_ROUTER_COMPUTER_USE_MODELS } from "../../../../shared/api"
44

55
// Mock axios
66
jest.mock("axios")
@@ -105,7 +105,7 @@ describe("getLiteLLMModels", () => {
105105
})
106106

107107
it("handles computer use models correctly", async () => {
108-
const computerUseModel = Array.from(COMPUTER_USE_MODELS)[0]
108+
const computerUseModel = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)[0]
109109
const mockResponse = {
110110
data: {
111111
data: [

src/api/providers/fetchers/__tests__/openrouter.spec.ts

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@ import * as path from "path"
44

55
import { back as nockBack } from "nock"
66

7-
import { PROMPT_CACHING_MODELS } from "../../../../shared/api"
7+
import {
8+
OPEN_ROUTER_PROMPT_CACHING_MODELS,
9+
OPEN_ROUTER_COMPUTER_USE_MODELS,
10+
OPEN_ROUTER_REASONING_BUDGET_MODELS,
11+
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
12+
} from "../../../../shared/api"
813

914
import { getOpenRouterModelEndpoints, getOpenRouterModels } from "../openrouter"
1015

@@ -23,22 +28,14 @@ describe("OpenRouter API", () => {
2328
.filter(([_, model]) => model.supportsPromptCache)
2429
.map(([id, _]) => id)
2530
.sort(),
26-
).toEqual(Array.from(PROMPT_CACHING_MODELS).sort())
31+
).toEqual(Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).sort())
2732

2833
expect(
2934
Object.entries(models)
3035
.filter(([_, model]) => model.supportsComputerUse)
3136
.map(([id, _]) => id)
3237
.sort(),
33-
).toEqual([
34-
"anthropic/claude-3.5-sonnet",
35-
"anthropic/claude-3.5-sonnet:beta",
36-
"anthropic/claude-3.7-sonnet",
37-
"anthropic/claude-3.7-sonnet:beta",
38-
"anthropic/claude-3.7-sonnet:thinking",
39-
"anthropic/claude-opus-4",
40-
"anthropic/claude-sonnet-4",
41-
])
38+
).toEqual(Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS).sort())
4239

4340
expect(
4441
Object.entries(models)
@@ -108,19 +105,14 @@ describe("OpenRouter API", () => {
108105
.filter(([_, model]) => model.supportsReasoningBudget)
109106
.map(([id, _]) => id)
110107
.sort(),
111-
).toEqual([
112-
"anthropic/claude-3.7-sonnet:beta",
113-
"anthropic/claude-3.7-sonnet:thinking",
114-
"anthropic/claude-opus-4",
115-
"anthropic/claude-sonnet-4",
116-
])
108+
).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort())
117109

118110
expect(
119111
Object.entries(models)
120112
.filter(([_, model]) => model.requiredReasoningBudget)
121113
.map(([id, _]) => id)
122114
.sort(),
123-
).toEqual(["anthropic/claude-3.7-sonnet:thinking"])
115+
).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort())
124116

125117
expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
126118
maxTokens: 8192,
@@ -155,6 +147,8 @@ describe("OpenRouter API", () => {
155147
supportedParameters: ["max_tokens", "temperature", "reasoning", "include_reasoning"],
156148
})
157149

150+
expect(models["google/gemini-2.5-flash-preview-05-20"].maxTokens).toEqual(65535)
151+
158152
const anthropicModels = Object.entries(models)
159153
.filter(([id, _]) => id.startsWith("anthropic/claude-3"))
160154
.map(([id, model]) => ({ id, maxTokens: model.maxTokens }))
@@ -200,7 +194,6 @@ describe("OpenRouter API", () => {
200194
cacheWritesPrice: 1.625,
201195
cacheReadsPrice: 0.31,
202196
description: undefined,
203-
supportsReasoningBudget: false,
204197
supportsReasoningEffort: undefined,
205198
supportedParameters: undefined,
206199
},
@@ -214,7 +207,6 @@ describe("OpenRouter API", () => {
214207
cacheWritesPrice: 1.625,
215208
cacheReadsPrice: 0.31,
216209
description: undefined,
217-
supportsReasoningBudget: false,
218210
supportsReasoningEffort: undefined,
219211
supportedParameters: undefined,
220212
},

src/api/providers/fetchers/litellm.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import axios from "axios"
2-
import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
2+
import { OPEN_ROUTER_COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
33

44
/**
55
* Fetches available models from a LiteLLM server
@@ -22,7 +22,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
2222
const response = await axios.get(`${baseUrl}/v1/model/info`, { headers, timeout: 5000 })
2323
const models: ModelRecord = {}
2424

25-
const computerModels = Array.from(COMPUTER_USE_MODELS)
25+
const computerModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)
2626

2727
// Process the model info from the response
2828
if (response.data && response.data.data && Array.isArray(response.data.data)) {

src/api/providers/fetchers/openrouter.ts

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,14 @@ import axios from "axios"
22
import { z } from "zod"
33

44
import { isModelParameter } from "../../../schemas"
5-
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../constants"
6-
import { ApiHandlerOptions, ModelInfo, COMPUTER_USE_MODELS, anthropicModels } from "../../../shared/api"
5+
import {
6+
ApiHandlerOptions,
7+
ModelInfo,
8+
OPEN_ROUTER_COMPUTER_USE_MODELS,
9+
OPEN_ROUTER_REASONING_BUDGET_MODELS,
10+
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
11+
anthropicModels,
12+
} from "../../../shared/api"
713
import { parseApiPrice } from "../../../utils/cost"
814

915
/**
@@ -106,7 +112,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
106112
id,
107113
model,
108114
modality: architecture?.modality,
109-
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
115+
maxTokens: top_provider?.max_completion_tokens,
110116
supportedParameters: supported_parameters,
111117
})
112118
}
@@ -146,7 +152,7 @@ export async function getOpenRouterModelEndpoints(
146152
id,
147153
model: endpoint,
148154
modality: architecture?.modality,
149-
maxTokens: id.startsWith("anthropic/") ? endpoint.max_completion_tokens : 0,
155+
maxTokens: endpoint.max_completion_tokens,
150156
})
151157
}
152158
} catch (error) {
@@ -183,8 +189,10 @@ export const parseOpenRouterModel = ({
183189

184190
const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
185191

192+
const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")
193+
186194
const modelInfo: ModelInfo = {
187-
maxTokens: maxTokens || 0,
195+
maxTokens: useMaxTokens ? maxTokens || 0 : 0,
188196
contextWindow: model.context_length,
189197
supportsImages: modality?.includes("image") ?? false,
190198
supportsPromptCache,
@@ -193,20 +201,24 @@ export const parseOpenRouterModel = ({
193201
cacheWritesPrice,
194202
cacheReadsPrice,
195203
description: model.description,
196-
supportsReasoningBudget:
197-
id.startsWith("anthropic/claude-3.7") ||
198-
id.startsWith("anthropic/claude-sonnet-4") ||
199-
id.startsWith("anthropic/claude-opus-4"),
200204
supportsReasoningEffort: supportedParameters ? supportedParameters.includes("reasoning") : undefined,
201205
supportedParameters: supportedParameters ? supportedParameters.filter(isModelParameter) : undefined,
202206
}
203207

204208
// The OpenRouter model definition doesn't give us any hints about
205209
// computer use, so we need to set that manually.
206-
if (COMPUTER_USE_MODELS.has(id)) {
210+
if (OPEN_ROUTER_COMPUTER_USE_MODELS.has(id)) {
207211
modelInfo.supportsComputerUse = true
208212
}
209213

214+
if (OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id)) {
215+
modelInfo.supportsReasoningBudget = true
216+
}
217+
218+
if (OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS.has(id)) {
219+
modelInfo.requiredReasoningBudget = true
220+
}
221+
210222
// For backwards compatibility with the old model definitions we will
211223
// continue to disable extending thinking for anthropic/claude-3.7-sonnet
212224
// and force it for anthropic/claude-3.7-sonnet:thinking.
@@ -219,7 +231,6 @@ export const parseOpenRouterModel = ({
219231

220232
if (id === "anthropic/claude-3.7-sonnet:thinking") {
221233
modelInfo.maxTokens = anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens
222-
modelInfo.requiredReasoningBudget = true
223234
}
224235

225236
return modelInfo

src/api/providers/openrouter.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import {
66
ModelRecord,
77
openRouterDefaultModelId,
88
openRouterDefaultModelInfo,
9-
PROMPT_CACHING_MODELS,
9+
OPEN_ROUTER_PROMPT_CACHING_MODELS,
1010
} from "../../shared/api"
1111

1212
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -87,7 +87,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8787

8888
// https://openrouter.ai/docs/features/prompt-caching
8989
// TODO: Add a `promptCacheStratey` field to `ModelInfo`.
90-
if (PROMPT_CACHING_MODELS.has(modelId)) {
90+
if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) {
9191
if (modelId.startsWith("google")) {
9292
addGeminiCacheBreakpoints(systemPrompt, openAiMessages)
9393
} else {

src/shared/api.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,7 +1836,7 @@ export const chutesModels = {
18361836
*/
18371837

18381838
// These models support prompt caching.
1839-
export const PROMPT_CACHING_MODELS = new Set([
1839+
export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
18401840
"anthropic/claude-3-haiku",
18411841
"anthropic/claude-3-haiku:beta",
18421842
"anthropic/claude-3-opus",
@@ -1867,7 +1867,7 @@ export const PROMPT_CACHING_MODELS = new Set([
18671867
])
18681868

18691869
// https://www.anthropic.com/news/3-5-models-and-computer-use
1870-
export const COMPUTER_USE_MODELS = new Set([
1870+
export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
18711871
"anthropic/claude-3.5-sonnet",
18721872
"anthropic/claude-3.5-sonnet:beta",
18731873
"anthropic/claude-3.7-sonnet",
@@ -1877,6 +1877,20 @@ export const COMPUTER_USE_MODELS = new Set([
18771877
"anthropic/claude-opus-4",
18781878
])
18791879

1880+
export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
1881+
"anthropic/claude-3.7-sonnet:beta",
1882+
"anthropic/claude-3.7-sonnet:thinking",
1883+
"anthropic/claude-opus-4",
1884+
"anthropic/claude-sonnet-4",
1885+
"google/gemini-2.5-flash-preview-05-20",
1886+
"google/gemini-2.5-flash-preview-05-20:thinking",
1887+
])
1888+
1889+
export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
1890+
"anthropic/claude-3.7-sonnet:thinking",
1891+
"google/gemini-2.5-flash-preview-05-20:thinking",
1892+
])
1893+
18801894
const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
18811895

18821896
export type RouterName = (typeof routerNames)[number]

webview-ui/src/components/settings/constants.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ import {
1313
chutesModels,
1414
} from "@roo/shared/api"
1515

16-
export { PROMPT_CACHING_MODELS } from "@roo/shared/api"
17-
1816
export { AWS_REGIONS } from "@roo/shared/aws_regions"
1917

2018
export const MODELS_BY_PROVIDER: Partial<Record<ProviderName, Record<string, ModelInfo>>> = {

0 commit comments

Comments
 (0)