Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/dry-ducks-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Fix reasoning budget for Gemini 2.5 Flash on OpenRouter
4 changes: 2 additions & 2 deletions src/api/providers/fetchers/__tests__/litellm.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import axios from "axios"
import { getLiteLLMModels } from "../litellm"
import { COMPUTER_USE_MODELS } from "../../../../shared/api"
import { OPEN_ROUTER_COMPUTER_USE_MODELS } from "../../../../shared/api"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this to make it clear that it's OpenRouter specific, so it's a bit weird that we're using it in LiteLLM.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah... I think the idea is to pull this from LiteLLM config instead soon, right @slytechnical?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. The addition of support for that has been merged in on LiteLLM's side , but I figured we should wait till that goes live in their next release before switching to it on our end

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LiteLLM said that we didn’t need to wait for a release since it pulls from hosted config - probably worth confirming though.


// Mock axios
jest.mock("axios")
Expand Down Expand Up @@ -105,7 +105,7 @@ describe("getLiteLLMModels", () => {
})

it("handles computer use models correctly", async () => {
const computerUseModel = Array.from(COMPUTER_USE_MODELS)[0]
const computerUseModel = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)[0]
const mockResponse = {
data: {
data: [
Expand Down
32 changes: 12 additions & 20 deletions src/api/providers/fetchers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ import * as path from "path"

import { back as nockBack } from "nock"

import { PROMPT_CACHING_MODELS } from "../../../../shared/api"
import {
OPEN_ROUTER_PROMPT_CACHING_MODELS,
OPEN_ROUTER_COMPUTER_USE_MODELS,
OPEN_ROUTER_REASONING_BUDGET_MODELS,
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
} from "../../../../shared/api"

import { getOpenRouterModelEndpoints, getOpenRouterModels } from "../openrouter"

Expand All @@ -23,22 +28,14 @@ describe("OpenRouter API", () => {
.filter(([_, model]) => model.supportsPromptCache)
.map(([id, _]) => id)
.sort(),
).toEqual(Array.from(PROMPT_CACHING_MODELS).sort())
).toEqual(Array.from(OPEN_ROUTER_PROMPT_CACHING_MODELS).sort())

expect(
Object.entries(models)
.filter(([_, model]) => model.supportsComputerUse)
.map(([id, _]) => id)
.sort(),
).toEqual([
"anthropic/claude-3.5-sonnet",
"anthropic/claude-3.5-sonnet:beta",
"anthropic/claude-3.7-sonnet",
"anthropic/claude-3.7-sonnet:beta",
"anthropic/claude-3.7-sonnet:thinking",
"anthropic/claude-opus-4",
"anthropic/claude-sonnet-4",
])
).toEqual(Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS).sort())

expect(
Object.entries(models)
Expand Down Expand Up @@ -108,19 +105,14 @@ describe("OpenRouter API", () => {
.filter(([_, model]) => model.supportsReasoningBudget)
.map(([id, _]) => id)
.sort(),
).toEqual([
"anthropic/claude-3.7-sonnet:beta",
"anthropic/claude-3.7-sonnet:thinking",
"anthropic/claude-opus-4",
"anthropic/claude-sonnet-4",
])
).toEqual(Array.from(OPEN_ROUTER_REASONING_BUDGET_MODELS).sort())

expect(
Object.entries(models)
.filter(([_, model]) => model.requiredReasoningBudget)
.map(([id, _]) => id)
.sort(),
).toEqual(["anthropic/claude-3.7-sonnet:thinking"])
).toEqual(Array.from(OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS).sort())

expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
maxTokens: 8192,
Expand Down Expand Up @@ -155,6 +147,8 @@ describe("OpenRouter API", () => {
supportedParameters: ["max_tokens", "temperature", "reasoning", "include_reasoning"],
})

expect(models["google/gemini-2.5-flash-preview-05-20"].maxTokens).toEqual(65535)

const anthropicModels = Object.entries(models)
.filter(([id, _]) => id.startsWith("anthropic/claude-3"))
.map(([id, model]) => ({ id, maxTokens: model.maxTokens }))
Expand Down Expand Up @@ -200,7 +194,6 @@ describe("OpenRouter API", () => {
cacheWritesPrice: 1.625,
cacheReadsPrice: 0.31,
description: undefined,
supportsReasoningBudget: false,
supportsReasoningEffort: undefined,
supportedParameters: undefined,
},
Expand All @@ -214,7 +207,6 @@ describe("OpenRouter API", () => {
cacheWritesPrice: 1.625,
cacheReadsPrice: 0.31,
description: undefined,
supportsReasoningBudget: false,
supportsReasoningEffort: undefined,
supportedParameters: undefined,
},
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/fetchers/litellm.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import axios from "axios"
import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
import { OPEN_ROUTER_COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"

/**
* Fetches available models from a LiteLLM server
Expand All @@ -22,7 +22,7 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
const response = await axios.get(`${baseUrl}/v1/model/info`, { headers, timeout: 5000 })
const models: ModelRecord = {}

const computerModels = Array.from(COMPUTER_USE_MODELS)
const computerModels = Array.from(OPEN_ROUTER_COMPUTER_USE_MODELS)

// Process the model info from the response
if (response.data && response.data.data && Array.isArray(response.data.data)) {
Expand Down
33 changes: 22 additions & 11 deletions src/api/providers/fetchers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@ import axios from "axios"
import { z } from "zod"

import { isModelParameter } from "../../../schemas"
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../constants"
import { ApiHandlerOptions, ModelInfo, COMPUTER_USE_MODELS, anthropicModels } from "../../../shared/api"
import {
ApiHandlerOptions,
ModelInfo,
OPEN_ROUTER_COMPUTER_USE_MODELS,
OPEN_ROUTER_REASONING_BUDGET_MODELS,
OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS,
anthropicModels,
} from "../../../shared/api"
import { parseApiPrice } from "../../../utils/cost"

/**
Expand Down Expand Up @@ -106,7 +112,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
id,
model,
modality: architecture?.modality,
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
maxTokens: top_provider?.max_completion_tokens,
supportedParameters: supported_parameters,
})
}
Expand Down Expand Up @@ -146,7 +152,7 @@ export async function getOpenRouterModelEndpoints(
id,
model: endpoint,
modality: architecture?.modality,
maxTokens: id.startsWith("anthropic/") ? endpoint.max_completion_tokens : 0,
maxTokens: endpoint.max_completion_tokens,
})
}
} catch (error) {
Expand Down Expand Up @@ -183,8 +189,10 @@ export const parseOpenRouterModel = ({

const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"

const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")

const modelInfo: ModelInfo = {
maxTokens: maxTokens || 0,
maxTokens: useMaxTokens ? maxTokens || 0 : 0,
contextWindow: model.context_length,
supportsImages: modality?.includes("image") ?? false,
supportsPromptCache,
Expand All @@ -193,20 +201,24 @@ export const parseOpenRouterModel = ({
cacheWritesPrice,
cacheReadsPrice,
description: model.description,
supportsReasoningBudget:
id.startsWith("anthropic/claude-3.7") ||
id.startsWith("anthropic/claude-sonnet-4") ||
id.startsWith("anthropic/claude-opus-4"),
supportsReasoningEffort: supportedParameters ? supportedParameters.includes("reasoning") : undefined,
supportedParameters: supportedParameters ? supportedParameters.filter(isModelParameter) : undefined,
}

// The OpenRouter model definition doesn't give us any hints about
// computer use, so we need to set that manually.
if (COMPUTER_USE_MODELS.has(id)) {
if (OPEN_ROUTER_COMPUTER_USE_MODELS.has(id)) {
modelInfo.supportsComputerUse = true
}

if (OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id)) {
modelInfo.supportsReasoningBudget = true
}

if (OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS.has(id)) {
modelInfo.requiredReasoningBudget = true
}

// For backwards compatibility with the old model definitions we will
// continue to disable extending thinking for anthropic/claude-3.7-sonnet
// and force it for anthropic/claude-3.7-sonnet:thinking.
Expand All @@ -219,7 +231,6 @@ export const parseOpenRouterModel = ({

if (id === "anthropic/claude-3.7-sonnet:thinking") {
modelInfo.maxTokens = anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens
modelInfo.requiredReasoningBudget = true
}

return modelInfo
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
ModelRecord,
openRouterDefaultModelId,
openRouterDefaultModelInfo,
PROMPT_CACHING_MODELS,
OPEN_ROUTER_PROMPT_CACHING_MODELS,
} from "../../shared/api"

import { convertToOpenAiMessages } from "../transform/openai-format"
Expand Down Expand Up @@ -87,7 +87,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

// https://openrouter.ai/docs/features/prompt-caching
// TODO: Add a `promptCacheStratey` field to `ModelInfo`.
if (PROMPT_CACHING_MODELS.has(modelId)) {
if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) {
if (modelId.startsWith("google")) {
addGeminiCacheBreakpoints(systemPrompt, openAiMessages)
} else {
Expand Down
18 changes: 16 additions & 2 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1836,7 +1836,7 @@ export const chutesModels = {
*/

// These models support prompt caching.
export const PROMPT_CACHING_MODELS = new Set([
export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
"anthropic/claude-3-haiku",
"anthropic/claude-3-haiku:beta",
"anthropic/claude-3-opus",
Expand Down Expand Up @@ -1867,7 +1867,7 @@ export const PROMPT_CACHING_MODELS = new Set([
])

// https://www.anthropic.com/news/3-5-models-and-computer-use
export const COMPUTER_USE_MODELS = new Set([
export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
"anthropic/claude-3.5-sonnet",
"anthropic/claude-3.5-sonnet:beta",
"anthropic/claude-3.7-sonnet",
Expand All @@ -1877,6 +1877,20 @@ export const COMPUTER_USE_MODELS = new Set([
"anthropic/claude-opus-4",
])

export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-3.7-sonnet:beta",
"anthropic/claude-3.7-sonnet:thinking",
"anthropic/claude-opus-4",
"anthropic/claude-sonnet-4",
"google/gemini-2.5-flash-preview-05-20",
"google/gemini-2.5-flash-preview-05-20:thinking",
])

export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-3.7-sonnet:thinking",
"google/gemini-2.5-flash-preview-05-20:thinking",
])

const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const

export type RouterName = (typeof routerNames)[number]
Expand Down
2 changes: 0 additions & 2 deletions webview-ui/src/components/settings/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import {
chutesModels,
} from "@roo/shared/api"

export { PROMPT_CACHING_MODELS } from "@roo/shared/api"

export { AWS_REGIONS } from "@roo/shared/aws_regions"

export const MODELS_BY_PROVIDER: Partial<Record<ProviderName, Record<string, ModelInfo>>> = {
Expand Down