Enhance OpenRouter model handling for Claude 4 and Opus 4, including virtual thinking models support

hannesrudolph · hannesrudolph · commit 1c62ef8356b9 · 2025-05-22T14:45:08.000-06:00
diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts
@@ -104,6 +104,27 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
 				modality: architecture?.modality,
 				maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
 			})
+
+			// Create virtual :thinking variants for Claude 4 models
+			if (id === "anthropic/claude-sonnet-4" && models[id]) {
+				const thinkingId = `${id}:thinking`
+				models[thinkingId] = parseOpenRouterModel({
+					id: thinkingId,
+					model,
+					modality: architecture?.modality,
+					maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
+				})
+			}
+
+			if (id === "anthropic/claude-opus-4" && models[id]) {
+				const thinkingId = `${id}:thinking`
+				models[thinkingId] = parseOpenRouterModel({
+					id: thinkingId,
+					model,
+					modality: architecture?.modality,
+					maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
+				})
+			}
 		}
 	} catch (error) {
 		console.error(
@@ -186,7 +207,7 @@ export const parseOpenRouterModel = ({
 		cacheWritesPrice,
 		cacheReadsPrice,
 		description: model.description,
-		thinking: id === "anthropic/claude-3.7-sonnet:thinking",
+		thinking: id.endsWith(":thinking"),
 	}
 
 	// The OpenRouter model definition doesn't give us any hints about
@@ -205,5 +226,18 @@ export const parseOpenRouterModel = ({
 			: anthropicModels["claude-3-7-sonnet-20250219"].maxTokens
 	}
 
+	// Claude Sonnet 4 and Opus 4 are also "hybrid" thinking models
+	if (id.startsWith("anthropic/claude-sonnet-4")) {
+		modelInfo.maxTokens = id.includes("thinking")
+			? anthropicModels["claude-sonnet-4-20250514:thinking"].maxTokens
+			: anthropicModels["claude-sonnet-4-20250514"].maxTokens
+	}
+
+	if (id.startsWith("anthropic/claude-opus-4")) {
+		modelInfo.maxTokens = id.includes("thinking")
+			? anthropicModels["claude-opus-4-20250514:thinking"].maxTokens
+			: anthropicModels["claude-opus-4-20250514"].maxTokens
+	}
+
 	return modelInfo
 }
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -84,14 +84,17 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			promptCache,
 		} = await this.fetchModel()
 
+		// For virtual :thinking models, use the base model ID for the API call
+		const apiModelId = modelId.endsWith(":thinking") ? modelId.replace(":thinking", "") : modelId
+
 		// Convert Anthropic messages to OpenAI format.
 		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
 			...convertToOpenAiMessages(messages),
 		]
 
 		// DeepSeek highly recommends using user instead of system role.
-		if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
+		if (apiModelId.startsWith("deepseek/deepseek-r1") || apiModelId === "perplexity/sonar-reasoning") {
 			openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
 		}
 
@@ -108,10 +111,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		// https://openrouter.ai/docs/transforms
 		const completionParams: OpenRouterChatCompletionParams = {
-			model: modelId,
+			model: apiModelId,
 			...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }),
 			temperature,
-			thinking, // OpenRouter is temporarily supporting this.
+			// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
+			...(modelId.endsWith(":thinking") && thinking
+				? {
+						reasoning: thinking?.budget_tokens
+							? { max_tokens: thinking.budget_tokens }
+							: { effort: reasoningEffort || "medium" },
+					}
+				: {
+						// For non-thinking models, use Anthropic's thinking parameter if available
+						thinking,
+					}),
 			top_p: topP,
 			messages: openAiMessages,
 			stream: true,
@@ -127,7 +140,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 				}),
 			// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
 			...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
-			...(REASONING_MODELS.has(modelId) && reasoningEffort && { reasoning: { effort: reasoningEffort } }),
+			// Original reasoning logic for non-virtual thinking models (like Grok)
+			...(REASONING_MODELS.has(modelId) &&
+				reasoningEffort &&
+				!modelId.endsWith(":thinking") && { reasoning: { effort: reasoningEffort } }),
 		}
 
 		const stream = await this.client.chat.completions.create(completionParams)
@@ -144,6 +160,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 			const delta = chunk.choices[0]?.delta
 
+			// Handle OpenRouter's reasoning tokens (for both virtual :thinking models and other reasoning models)
 			if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
 				yield { type: "reasoning", text: delta.reasoning }
 			}
@@ -215,12 +232,25 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 	}
 
 	async completePrompt(prompt: string) {
-		let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel()
+		let { id: modelId, maxTokens, thinking, temperature, reasoningEffort } = await this.fetchModel()
+
+		// For virtual :thinking models, use the base model ID for the API call
+		const apiModelId = modelId.endsWith(":thinking") ? modelId.replace(":thinking", "") : modelId
 
 		const completionParams: OpenRouterChatCompletionParams = {
-			model: modelId,
+			model: apiModelId,
 			max_tokens: maxTokens,
-			thinking,
+			// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
+			...(modelId.endsWith(":thinking") && thinking
+				? {
+						reasoning: thinking?.budget_tokens
+							? { max_tokens: thinking.budget_tokens }
+							: { effort: reasoningEffort || "medium" },
+					}
+				: {
+						// For non-thinking models, use Anthropic's thinking parameter if available
+						thinking,
+					}),
 			temperature,
 			messages: [{ role: "user", content: prompt }],
 			stream: false,
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -1921,6 +1921,10 @@ export const PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-3.7-sonnet",
 	"anthropic/claude-3.7-sonnet:beta",
 	"anthropic/claude-3.7-sonnet:thinking",
+	"anthropic/claude-sonnet-4",
+	"anthropic/claude-sonnet-4:thinking",
+	"anthropic/claude-opus-4",
+	"anthropic/claude-opus-4:thinking",
 	"anthropic/claude-sonnet-4-20250514",
 	"anthropic/claude-sonnet-4-20250514:thinking",
 	"anthropic/claude-opus-4-20250514",
@@ -1946,6 +1950,10 @@ export const COMPUTER_USE_MODELS = new Set([
 	"anthropic/claude-sonnet-4-20250514:thinking",
 	"anthropic/claude-opus-4-20250514",
 	"anthropic/claude-opus-4-20250514:thinking",
+	"anthropic/claude-sonnet-4",
+	"anthropic/claude-sonnet-4:thinking",
+	"anthropic/claude-opus-4",
+	"anthropic/claude-opus-4:thinking",
 ])
 
 const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
diff --git a/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts b/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts
@@ -65,7 +65,7 @@ async function getOpenRouterProvidersForModel(modelId: string) {
 				inputPrice,
 				outputPrice,
 				description,
-				thinking: modelId === "anthropic/claude-3.7-sonnet:thinking",
+				thinking: modelId.endsWith(":thinking"),
 				label: providerName,
 			}
 
@@ -75,7 +75,21 @@ async function getOpenRouterProvidersForModel(modelId: string) {
 					modelInfo.supportsPromptCache = true
 					modelInfo.cacheWritesPrice = 3.75
 					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
+					modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
+					break
+				case modelId.startsWith("anthropic/claude-sonnet-4"):
+					modelInfo.supportsComputerUse = true
+					modelInfo.supportsPromptCache = true
+					modelInfo.cacheWritesPrice = 3.75
+					modelInfo.cacheReadsPrice = 0.3
+					modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
+					break
+				case modelId.startsWith("anthropic/claude-opus-4"):
+					modelInfo.supportsComputerUse = true
+					modelInfo.supportsPromptCache = true
+					modelInfo.cacheWritesPrice = 18.75
+					modelInfo.cacheReadsPrice = 1.5
+					modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
 					break
 				case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 					modelInfo.supportsPromptCache = true