@@ -84,14 +84,17 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8484 promptCache,
8585 } = await this . fetchModel ( )
8686
87+ // For virtual :thinking models, use the base model ID for the API call
88+ const apiModelId = modelId . endsWith ( ":thinking" ) ? modelId . replace ( ":thinking" , "" ) : modelId
89+
8790 // Convert Anthropic messages to OpenAI format.
8891 let openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
8992 { role : "system" , content : systemPrompt } ,
9093 ...convertToOpenAiMessages ( messages ) ,
9194 ]
9295
9396 // DeepSeek highly recommends using user instead of system role.
94- if ( modelId . startsWith ( "deepseek/deepseek-r1" ) || modelId === "perplexity/sonar-reasoning" ) {
97+ if ( apiModelId . startsWith ( "deepseek/deepseek-r1" ) || apiModelId === "perplexity/sonar-reasoning" ) {
9598 openAiMessages = convertToR1Format ( [ { role : "user" , content : systemPrompt } , ...messages ] )
9699 }
97100
@@ -108,10 +111,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
108111
109112 // https://openrouter.ai/docs/transforms
110113 const completionParams : OpenRouterChatCompletionParams = {
111- model : modelId ,
114+ model : apiModelId ,
112115 ...( maxTokens && maxTokens > 0 && { max_tokens : maxTokens } ) ,
113116 temperature,
114- thinking, // OpenRouter is temporarily supporting this.
117+ // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
118+ ...( modelId . endsWith ( ":thinking" ) && thinking
119+ ? {
120+ reasoning : thinking ?. budget_tokens
121+ ? { max_tokens : thinking . budget_tokens }
122+ : { effort : reasoningEffort || "medium" } ,
123+ }
124+ : {
125+ // For non-thinking models, use Anthropic's thinking parameter if available
126+ thinking,
127+ } ) ,
115128 top_p : topP ,
116129 messages : openAiMessages ,
117130 stream : true ,
@@ -127,7 +140,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
127140 } ) ,
128141 // This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
129142 ...( ( this . options . openRouterUseMiddleOutTransform ?? true ) && { transforms : [ "middle-out" ] } ) ,
130- ...( REASONING_MODELS . has ( modelId ) && reasoningEffort && { reasoning : { effort : reasoningEffort } } ) ,
143+ // Original reasoning logic for non-virtual thinking models (like Grok)
144+ ...( REASONING_MODELS . has ( modelId ) &&
145+ reasoningEffort &&
146+ ! modelId . endsWith ( ":thinking" ) && { reasoning : { effort : reasoningEffort } } ) ,
131147 }
132148
133149 const stream = await this . client . chat . completions . create ( completionParams )
@@ -144,6 +160,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
144160
145161 const delta = chunk . choices [ 0 ] ?. delta
146162
163+ // Handle OpenRouter's reasoning tokens (for both virtual :thinking models and other reasoning models)
147164 if ( "reasoning" in delta && delta . reasoning && typeof delta . reasoning === "string" ) {
148165 yield { type : "reasoning" , text : delta . reasoning }
149166 }
@@ -215,12 +232,25 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
215232 }
216233
217234 async completePrompt ( prompt : string ) {
218- let { id : modelId , maxTokens, thinking, temperature } = await this . fetchModel ( )
235+ let { id : modelId , maxTokens, thinking, temperature, reasoningEffort } = await this . fetchModel ( )
236+
237+ // For virtual :thinking models, use the base model ID for the API call
238+ const apiModelId = modelId . endsWith ( ":thinking" ) ? modelId . replace ( ":thinking" , "" ) : modelId
219239
220240 const completionParams : OpenRouterChatCompletionParams = {
221- model : modelId ,
241+ model : apiModelId ,
222242 max_tokens : maxTokens ,
223- thinking,
243+ // For virtual :thinking models, use OpenRouter's reasoning tokens instead of Anthropic's thinking
244+ ...( modelId . endsWith ( ":thinking" ) && thinking
245+ ? {
246+ reasoning : thinking ?. budget_tokens
247+ ? { max_tokens : thinking . budget_tokens }
248+ : { effort : reasoningEffort || "medium" } ,
249+ }
250+ : {
251+ // For non-thinking models, use Anthropic's thinking parameter if available
252+ thinking,
253+ } ) ,
224254 temperature,
225255 messages : [ { role : "user" , content : prompt } ] ,
226256 stream : false ,
0 commit comments