@@ -85,7 +85,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8585 } = await this . fetchModel ( )
8686
8787 // For virtual :thinking models, use the base model ID for the API call
88- const apiModelId = modelId . endsWith ( ":thinking" ) ? modelId . replace ( ":thinking" , "" ) : modelId
88+ // Only strip :thinking from models that are artificially created virtual variants
89+ const isVirtualThinkingModel =
90+ modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking"
91+ const apiModelId = isVirtualThinkingModel ? modelId . replace ( ":thinking" , "" ) : modelId
8992
9093 // Convert Anthropic messages to OpenAI format.
9194 let openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
@@ -115,11 +118,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
115118 ...( maxTokens && maxTokens > 0 && { max_tokens : maxTokens } ) ,
116119 temperature,
117120 // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
118- ...( modelId . endsWith ( ":thinking" ) && thinking
121+ ...( isVirtualThinkingModel && thinking
119122 ? {
120- reasoning : thinking ?. budget_tokens
121- ? { max_tokens : thinking . budget_tokens }
122- : { effort : reasoningEffort || "medium" } ,
123+ // Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models
124+ ...( thinking ?. budget_tokens && { reasoning : { max_tokens : thinking . budget_tokens } } ) ,
123125 }
124126 : {
125127 // For non-thinking models, use Anthropic's thinking parameter if available
@@ -143,7 +145,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
143145 // Original reasoning logic for non-virtual thinking models (like Grok)
144146 ...( REASONING_MODELS . has ( modelId ) &&
145147 reasoningEffort &&
146- ! modelId . endsWith ( ":thinking" ) && { reasoning : { effort : reasoningEffort } } ) ,
148+ ! isVirtualThinkingModel && { reasoning : { effort : reasoningEffort } } ) ,
147149 }
148150
149151 const stream = await this . client . chat . completions . create ( completionParams )
@@ -235,17 +237,19 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
235237 let { id : modelId , maxTokens, thinking, temperature, reasoningEffort } = await this . fetchModel ( )
236238
237239 // For virtual :thinking models, use the base model ID for the API call
238- const apiModelId = modelId . endsWith ( ":thinking" ) ? modelId . replace ( ":thinking" , "" ) : modelId
240+ // Only strip :thinking from models that are artificially created virtual variants
241+ const isVirtualThinkingModel =
242+ modelId === "anthropic/claude-sonnet-4:thinking" || modelId === "anthropic/claude-opus-4:thinking"
243+ const apiModelId = isVirtualThinkingModel ? modelId . replace ( ":thinking" , "" ) : modelId
239244
240245 const completionParams : OpenRouterChatCompletionParams = {
241246 model : apiModelId ,
242247 max_tokens : maxTokens ,
243248 // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
244- ...( modelId . endsWith ( ":thinking" ) && thinking
249+ ...( isVirtualThinkingModel && thinking
245250 ? {
246- reasoning : thinking ?. budget_tokens
247- ? { max_tokens : thinking . budget_tokens }
248- : { effort : reasoningEffort || "medium" } ,
251+ // Only use max_tokens if budget_tokens is specified, don't use effort for Anthropic models
252+ ...( thinking ?. budget_tokens && { reasoning : { max_tokens : thinking . budget_tokens } } ) ,
249253 }
250254 : {
251255 // For non-thinking models, use Anthropic's thinking parameter if available
0 commit comments