Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion src/api/providers/fetchers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,27 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions): Promise<
modality: architecture?.modality,
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
})

// Create virtual :thinking variants for Claude 4 models
if (id === "anthropic/claude-sonnet-4" && models[id]) {
const thinkingId = `${id}:thinking`
models[thinkingId] = parseOpenRouterModel({
id: thinkingId,
model,
modality: architecture?.modality,
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
})
}

if (id === "anthropic/claude-opus-4" && models[id]) {
const thinkingId = `${id}:thinking`
models[thinkingId] = parseOpenRouterModel({
id: thinkingId,
model,
modality: architecture?.modality,
maxTokens: id.startsWith("anthropic/") ? top_provider?.max_completion_tokens : 0,
})
}
}
} catch (error) {
console.error(
Expand Down Expand Up @@ -186,7 +207,7 @@ export const parseOpenRouterModel = ({
cacheWritesPrice,
cacheReadsPrice,
description: model.description,
thinking: id === "anthropic/claude-3.7-sonnet:thinking",
thinking: id.endsWith(":thinking"),
}

// The OpenRouter model definition doesn't give us any hints about
Expand All @@ -205,5 +226,18 @@ export const parseOpenRouterModel = ({
: anthropicModels["claude-3-7-sonnet-20250219"].maxTokens
}

// Claude Sonnet 4 and Opus 4 are also "hybrid" thinking models
if (id.startsWith("anthropic/claude-sonnet-4")) {
modelInfo.maxTokens = id.includes("thinking")
? anthropicModels["claude-sonnet-4-20250514:thinking"].maxTokens
: anthropicModels["claude-sonnet-4-20250514"].maxTokens
}

if (id.startsWith("anthropic/claude-opus-4")) {
modelInfo.maxTokens = id.includes("thinking")
? anthropicModels["claude-opus-4-20250514:thinking"].maxTokens
: anthropicModels["claude-opus-4-20250514"].maxTokens
}

return modelInfo
}
48 changes: 41 additions & 7 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
promptCache,
} = await this.fetchModel()

// For virtual :thinking models, use the base model ID for the API call
// Only strip :thinking from models that are artificially created virtual variants
const isVirtualThinkingModel =
modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking"
const apiModelId = isVirtualThinkingModel ? modelId.replace(":thinking", "") : modelId

// Convert Anthropic messages to OpenAI format.
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]

// DeepSeek highly recommends using user instead of system role.
if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
if (apiModelId.startsWith("deepseek/deepseek-r1") || apiModelId === "perplexity/sonar-reasoning") {
openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
}

Expand All @@ -108,10 +114,19 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

// https://openrouter.ai/docs/transforms
const completionParams: OpenRouterChatCompletionParams = {
model: modelId,
model: apiModelId,
...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }),
temperature,
thinking, // OpenRouter is temporarily supporting this.
// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
...(isVirtualThinkingModel && thinking
? {
// Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models
...(thinking?.budget_tokens && { reasoning: { max_tokens: thinking.budget_tokens } }),
}
: {
// For non-thinking models, use Anthropic's thinking parameter if available
thinking,
}),
top_p: topP,
messages: openAiMessages,
stream: true,
Expand All @@ -127,7 +142,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}),
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
...(REASONING_MODELS.has(modelId) && reasoningEffort && { reasoning: { effort: reasoningEffort } }),
// Original reasoning logic for non-virtual thinking models (like Grok)
...(REASONING_MODELS.has(modelId) &&
reasoningEffort &&
!isVirtualThinkingModel && { reasoning: { effort: reasoningEffort } }),
}

const stream = await this.client.chat.completions.create(completionParams)
Expand All @@ -144,6 +162,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

const delta = chunk.choices[0]?.delta

// Handle OpenRouter's reasoning tokens (for both virtual :thinking models and other reasoning models)
if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
yield { type: "reasoning", text: delta.reasoning }
}
Expand Down Expand Up @@ -215,12 +234,27 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}

async completePrompt(prompt: string) {
let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel()
let { id: modelId, maxTokens, thinking, temperature, reasoningEffort } = await this.fetchModel()

// For virtual :thinking models, use the base model ID for the API call
// Only strip :thinking from models that are artificially created virtual variants
const isVirtualThinkingModel =
modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking"
const apiModelId = isVirtualThinkingModel ? modelId.replace(":thinking", "") : modelId

const completionParams: OpenRouterChatCompletionParams = {
model: modelId,
model: apiModelId,
max_tokens: maxTokens,
thinking,
// For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
...(isVirtualThinkingModel && thinking
? {
// Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models
...(thinking?.budget_tokens && { reasoning: { max_tokens: thinking.budget_tokens } }),
}
: {
// For non-thinking models, use Anthropic's thinking parameter if available
thinking,
}),
temperature,
messages: [{ role: "user", content: prompt }],
stream: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ async function getOpenRouterProvidersForModel(modelId: string) {
inputPrice,
outputPrice,
description,
thinking: modelId === "anthropic/claude-3.7-sonnet:thinking",
thinking: modelId.endsWith(":thinking"),
label: providerName,
}

Expand All @@ -75,7 +75,21 @@ async function getOpenRouterProvidersForModel(modelId: string) {
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 3.75
modelInfo.cacheReadsPrice = 0.3
modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
break
case modelId.startsWith("anthropic/claude-sonnet-4"):
modelInfo.supportsComputerUse = true
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 3.75
modelInfo.cacheReadsPrice = 0.3
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
break
case modelId.startsWith("anthropic/claude-opus-4"):
modelInfo.supportsComputerUse = true
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 18.75
modelInfo.cacheReadsPrice = 1.5
modelInfo.maxTokens = modelId.endsWith(":thinking") ? 64_000 : 8192
break
case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
modelInfo.supportsPromptCache = true
Expand Down