diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index f8e605aa08..94c04bfe0a 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -104,6 +104,27 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise< modality: architecture?.modality, maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0, }) + + // Create virtual :thinking variants for Claude 4 models + if (id === "anthropic/claude-sonnet-4" && models[id]) { + const thinkingId = `${id}:thinking` + models[thinkingId] = parseOpenRouterModel({ + id: thinkingId, + model, + modality: architecture?.modality, + maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0, + }) + } + + if (id === "anthropic/claude-opus-4" && models[id]) { + const thinkingId = `${id}:thinking` + models[thinkingId] = parseOpenRouterModel({ + id: thinkingId, + model, + modality: architecture?.modality, + maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0, + }) + } } } catch (error) { console.error( @@ -186,7 +207,7 @@ export const parseOpenRouterModel = ({ cacheWritesPrice, cacheReadsPrice, description: model.description, - thinking: id === "anthropic/claude-3.7-sonnet:thinking", + thinking: id.endsWith(":thinking"), } // The OpenRouter model definition doesn't give us any hints about @@ -205,5 +226,18 @@ export const parseOpenRouterModel = ({ : anthropicModels["claude-3-7-sonnet-20250219"].maxTokens } + // Claude Sonnet 4 and Opus 4 are also "hybrid" thinking models + if (id.startsWith("anthropic/claude-sonnet-4")) { + modelInfo.maxTokens = id.includes("thinking") + ? anthropicModels["claude-sonnet-4-20250514:thinking"].maxTokens + : anthropicModels["claude-sonnet-4-20250514"].maxTokens + } + + if (id.startsWith("anthropic/claude-opus-4")) { + modelInfo.maxTokens = id.includes("thinking") + ? anthropicModels["claude-opus-4-20250514:thinking"].maxTokens + : anthropicModels["claude-opus-4-20250514"].maxTokens + } + return modelInfo } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 88b2729d65..9493c5dc5f 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -84,6 +84,12 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH promptCache, } = await this.fetchModel() + // For virtual :thinking models, use the base model ID for the API call + // Only strip :thinking from models that are artificially created virtual variants + const isVirtualThinkingModel = + modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking" + const apiModelId = isVirtualThinkingModel ? modelId.replace(":thinking", "") : modelId + // Convert Anthropic messages to OpenAI format. let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, @@ -91,7 +97,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ] // DeepSeek highly recommends using user instead of system role. - if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") { + if (apiModelId.startsWith("deepseek/deepseek-r1") || apiModelId === "perplexity/sonar-reasoning") { openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) } @@ -108,10 +114,19 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH // https://openrouter.ai/docs/transforms const completionParams: OpenRouterChatCompletionParams = { - model: modelId, + model: apiModelId, ...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }), temperature, - thinking, // OpenRouter is temporarily supporting this. + // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking + ...(isVirtualThinkingModel && thinking + ? { + // Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models + ...(thinking?.budget_tokens && { reasoning: { max_tokens: thinking.budget_tokens } }), + } + : { + // For non-thinking models, use Anthropic's thinking parameter if available + thinking, + }), top_p: topP, messages: openAiMessages, stream: true, @@ -127,7 +142,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }), // This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true. ...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }), - ...(REASONING_MODELS.has(modelId) && reasoningEffort && { reasoning: { effort: reasoningEffort } }), + // Original reasoning logic for non-virtual thinking models (like Grok) + ...(REASONING_MODELS.has(modelId) && + reasoningEffort && + !isVirtualThinkingModel && { reasoning: { effort: reasoningEffort } }), } const stream = await this.client.chat.completions.create(completionParams) @@ -144,6 +162,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH const delta = chunk.choices[0]?.delta + // Handle OpenRouter's reasoning tokens (for both virtual :thinking models and other reasoning models) if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { yield { type: "reasoning", text: delta.reasoning } } @@ -215,12 +234,27 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } async completePrompt(prompt: string) { - let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel() + let { id: modelId, maxTokens, thinking, temperature, reasoningEffort } = await this.fetchModel() + + // For virtual :thinking models, use the base model ID for the API call + // Only strip :thinking from models that are artificially created virtual variants + const isVirtualThinkingModel = + modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking" + const apiModelId = isVirtualThinkingModel ? modelId.replace(":thinking", "") : modelId const completionParams: OpenRouterChatCompletionParams = { - model: modelId, + model: apiModelId, max_tokens: maxTokens, - thinking, + // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking + ...(isVirtualThinkingModel && thinking + ? { + // Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models + ...(thinking?.budget_tokens && { reasoning: { max_tokens: thinking.budget_tokens } }), + } + : { + // For non-thinking models, use Anthropic's thinking parameter if available + thinking, + }), temperature, messages: [{ role: "user", content: prompt }], stream: false, diff --git a/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts b/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts index 8a881acd59..d309889de9 100644 --- a/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts +++ b/webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts @@ -65,7 +65,7 @@ async function getOpenRouterProvidersForModel(modelId: string) { inputPrice, outputPrice, description, - thinking: modelId === "anthropic/claude-3.7-sonnet:thinking", + thinking: modelId.endsWith(":thinking"), label: providerName, } @@ -75,7 +75,21 @@ async function getOpenRouterProvidersForModel(modelId: string) { modelInfo.supportsPromptCache = true modelInfo.cacheWritesPrice = 3.75 modelInfo.cacheReadsPrice = 0.3 - modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192 + modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192 + break + case modelId.startsWith("anthropic/claude-sonnet-4"): + modelInfo.supportsComputerUse = true + modelInfo.supportsPromptCache = true + modelInfo.cacheWritesPrice = 3.75 + modelInfo.cacheReadsPrice = 0.3 + modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192 + break + case modelId.startsWith("anthropic/claude-opus-4"): + modelInfo.supportsComputerUse = true + modelInfo.supportsPromptCache = true + modelInfo.cacheWritesPrice = 18.75 + modelInfo.cacheReadsPrice = 1.5 + modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192 break case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"): modelInfo.supportsPromptCache = true