Skip to content

Commit 1c62ef8

Browse files
committed
Enhance OpenRouter model handling for Claude 4 and Opus 4, including virtual thinking models support
1 parent 48ca890 commit 1c62ef8

File tree

4 files changed

+96
-10
lines changed

4 files changed

+96
-10
lines changed

src/api/providers/fetchers/openrouter.ts

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,27 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
104104
modality: architecture?.modality,
105105
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
106106
})
107+
108+
// Create virtual :thinking variants for Claude 4 models
109+
if (id === "anthropic/claude-sonnet-4" && models[id]) {
110+
const thinkingId = `${id}:thinking`
111+
models[thinkingId] = parseOpenRouterModel({
112+
id: thinkingId,
113+
model,
114+
modality: architecture?.modality,
115+
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
116+
})
117+
}
118+
119+
if (id === "anthropic/claude-opus-4" && models[id]) {
120+
const thinkingId = `${id}:thinking`
121+
models[thinkingId] = parseOpenRouterModel({
122+
id: thinkingId,
123+
model,
124+
modality: architecture?.modality,
125+
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
126+
})
127+
}
107128
}
108129
} catch (error) {
109130
console.error(
@@ -186,7 +207,7 @@ export const parseOpenRouterModel = ({
186207
cacheWritesPrice,
187208
cacheReadsPrice,
188209
description: model.description,
189-
thinking: id === "anthropic/claude-3.7-sonnet:thinking",
210+
thinking: id.endsWith(":thinking"),
190211
}
191212

192213
// The OpenRouter model definition doesn't give us any hints about
@@ -205,5 +226,18 @@ export const parseOpenRouterModel = ({
205226
: anthropicModels["claude-3-7-sonnet-20250219"].maxTokens
206227
}
207228

229+
// Claude Sonnet 4 and Opus 4 are also "hybrid" thinking models
230+
if (id.startsWith("anthropic/claude-sonnet-4")) {
231+
modelInfo.maxTokens = id.includes("thinking")
232+
? anthropicModels["claude-sonnet-4-20250514:thinking"].maxTokens
233+
: anthropicModels["claude-sonnet-4-20250514"].maxTokens
234+
}
235+
236+
if (id.startsWith("anthropic/claude-opus-4")) {
237+
modelInfo.maxTokens = id.includes("thinking")
238+
? anthropicModels["claude-opus-4-20250514:thinking"].maxTokens
239+
: anthropicModels["claude-opus-4-20250514"].maxTokens
240+
}
241+
208242
return modelInfo
209243
}

src/api/providers/openrouter.ts

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,17 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8484
promptCache,
8585
} = await this.fetchModel()
8686

87+
// For virtual :thinking models, use the base model ID for the API call
88+
const apiModelId = modelId.endsWith(":thinking") ? modelId.replace(":thinking", "") : modelId
89+
8790
// Convert Anthropic messages to OpenAI format.
8891
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
8992
{ role: "system", content: systemPrompt },
9093
...convertToOpenAiMessages(messages),
9194
]
9295

9396
// DeepSeek highly recommends using user instead of system role.
94-
if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
97+
if (apiModelId.startsWith("deepseek/deepseek-r1") || apiModelId === "perplexity/sonar-reasoning") {
9598
openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
9699
}
97100

@@ -108,10 +111,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
108111

109112
// https://openrouter.ai/docs/transforms
110113
const completionParams: OpenRouterChatCompletionParams = {
111-
model: modelId,
114+
model: apiModelId,
112115
...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }),
113116
temperature,
114-
thinking, // OpenRouter is temporarily supporting this.
117+
// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
118+
...(modelId.endsWith(":thinking") && thinking
119+
? {
120+
reasoning: thinking?.budget_tokens
121+
? { max_tokens: thinking.budget_tokens }
122+
: { effort: reasoningEffort || "medium" },
123+
}
124+
: {
125+
// For non-thinking models, use Anthropic's thinking parameter if available
126+
thinking,
127+
}),
115128
top_p: topP,
116129
messages: openAiMessages,
117130
stream: true,
@@ -127,7 +140,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
127140
}),
128141
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
129142
...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
130-
...(REASONING_MODELS.has(modelId) && reasoningEffort && { reasoning: { effort: reasoningEffort } }),
143+
// Original reasoning logic for non-virtual thinking models (like Grok)
144+
...(REASONING_MODELS.has(modelId) &&
145+
reasoningEffort &&
146+
!modelId.endsWith(":thinking") && { reasoning: { effort: reasoningEffort } }),
131147
}
132148

133149
const stream = await this.client.chat.completions.create(completionParams)
@@ -144,6 +160,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
144160

145161
const delta = chunk.choices[0]?.delta
146162

163+
// Handle OpenRouter's reasoning tokens (for both virtual :thinking models and other reasoning models)
147164
if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
148165
yield { type: "reasoning", text: delta.reasoning }
149166
}
@@ -215,12 +232,25 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
215232
}
216233

217234
async completePrompt(prompt: string) {
218-
let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel()
235+
let { id: modelId, maxTokens, thinking, temperature, reasoningEffort } = await this.fetchModel()
236+
237+
// For virtual :thinking models, use the base model ID for the API call
238+
const apiModelId = modelId.endsWith(":thinking") ? modelId.replace(":thinking", "") : modelId
219239

220240
const completionParams: OpenRouterChatCompletionParams = {
221-
model: modelId,
241+
model: apiModelId,
222242
max_tokens: maxTokens,
223-
thinking,
243+
// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
244+
...(modelId.endsWith(":thinking") && thinking
245+
? {
246+
reasoning: thinking?.budget_tokens
247+
? { max_tokens: thinking.budget_tokens }
248+
: { effort: reasoningEffort || "medium" },
249+
}
250+
: {
251+
// For non-thinking models, use Anthropic's thinking parameter if available
252+
thinking,
253+
}),
224254
temperature,
225255
messages: [{ role: "user", content: prompt }],
226256
stream: false,

src/shared/api.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,10 @@ export const PROMPT_CACHING_MODELS = new Set([
19211921
"anthropic/claude-3.7-sonnet",
19221922
"anthropic/claude-3.7-sonnet:beta",
19231923
"anthropic/claude-3.7-sonnet:thinking",
1924+
"anthropic/claude-sonnet-4",
1925+
"anthropic/claude-sonnet-4:thinking",
1926+
"anthropic/claude-opus-4",
1927+
"anthropic/claude-opus-4:thinking",
19241928
"anthropic/claude-sonnet-4-20250514",
19251929
"anthropic/claude-sonnet-4-20250514:thinking",
19261930
"anthropic/claude-opus-4-20250514",
@@ -1946,6 +1950,10 @@ export const COMPUTER_USE_MODELS = new Set([
19461950
"anthropic/claude-sonnet-4-20250514:thinking",
19471951
"anthropic/claude-opus-4-20250514",
19481952
"anthropic/claude-opus-4-20250514:thinking",
1953+
"anthropic/claude-sonnet-4",
1954+
"anthropic/claude-sonnet-4:thinking",
1955+
"anthropic/claude-opus-4",
1956+
"anthropic/claude-opus-4:thinking",
19491957
])
19501958

19511959
const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const

webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ async function getOpenRouterProvidersForModel(modelId: string) {
6565
inputPrice,
6666
outputPrice,
6767
description,
68-
thinking: modelId === "anthropic/claude-3.7-sonnet:thinking",
68+
thinking: modelId.endsWith(":thinking"),
6969
label: providerName,
7070
}
7171

@@ -75,7 +75,21 @@ async function getOpenRouterProvidersForModel(modelId: string) {
7575
modelInfo.supportsPromptCache = true
7676
modelInfo.cacheWritesPrice = 3.75
7777
modelInfo.cacheReadsPrice = 0.3
78-
modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
78+
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
79+
break
80+
case modelId.startsWith("anthropic/claude-sonnet-4"):
81+
modelInfo.supportsComputerUse = true
82+
modelInfo.supportsPromptCache = true
83+
modelInfo.cacheWritesPrice = 3.75
84+
modelInfo.cacheReadsPrice = 0.3
85+
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
86+
break
87+
case modelId.startsWith("anthropic/claude-opus-4"):
88+
modelInfo.supportsComputerUse = true
89+
modelInfo.supportsPromptCache = true
90+
modelInfo.cacheWritesPrice = 18.75
91+
modelInfo.cacheReadsPrice = 1.5
92+
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
7993
break
8094
case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
8195
modelInfo.supportsPromptCache = true

0 commit comments

Comments
 (0)