@@ -52,10 +52,14 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
5252 ...convertToOpenAiMessages ( messages ) ,
5353 ]
5454
55+ const { id : modelId , info : modelInfo } = this . getModel ( )
56+
5557 // prompt caching: https://openrouter.ai/docs/prompt-caching
5658 // this is specifically for claude models (some models may 'support prompt caching' automatically without this)
57- switch ( this . getModel ( ) . id ) {
59+ switch ( modelId ) {
60+ case "anthropic/claude-3.7-sonnet:thinking" :
5861 case "anthropic/claude-3.7-sonnet" :
62+ case "anthropic/claude-3.7-sonnet:beta" :
5963 case "anthropic/claude-3.5-sonnet" :
6064 case "anthropic/claude-3.5-sonnet:beta" :
6165 case "anthropic/claude-3.5-sonnet-20240620" :
@@ -103,31 +107,25 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
103107 break
104108 }
105109
106- // Not sure how openrouter defaults max tokens when no value is provided, but the anthropic api requires this value and since they offer both 4096 and 8192 variants, we should ensure 8192.
107- // (models usually default to max tokens allowed)
108- let maxTokens : number | undefined
109- switch ( this . getModel ( ) . id ) {
110- case "anthropic/claude-3.7-sonnet" :
111- case "anthropic/claude-3.5-sonnet" :
112- case "anthropic/claude-3.5-sonnet:beta" :
113- case "anthropic/claude-3.5-sonnet-20240620" :
114- case "anthropic/claude-3.5-sonnet-20240620:beta" :
115- case "anthropic/claude-3-5-haiku" :
116- case "anthropic/claude-3-5-haiku:beta" :
117- case "anthropic/claude-3-5-haiku-20241022" :
118- case "anthropic/claude-3-5-haiku-20241022:beta" :
119- maxTokens = 8_192
120- break
110+ // Not sure how openrouter defaults max tokens when no value is
111+ // provided, but the Anthropic API requires this value and since they
112+ // offer both 4096 and 8192 variants, we should ensure 8192.
113+ // (Models usually default to max tokens allowed.)
114+ let maxTokens : number | undefined = undefined
115+
116+ if ( modelId . startsWith ( "anthropic/claude-3.5" ) ) {
117+ maxTokens = modelInfo . maxTokens ?? 8_192
118+ }
119+
120+ if ( modelId . startsWith ( "anthropic/claude-3.7" ) ) {
121+ maxTokens = modelInfo . maxTokens ?? 16_384
121122 }
122123
123124 let defaultTemperature = OPENROUTER_DEFAULT_TEMPERATURE
124125 let topP : number | undefined = undefined
125126
126127 // Handle models based on deepseek-r1
127- if (
128- this . getModel ( ) . id . startsWith ( "deepseek/deepseek-r1" ) ||
129- this . getModel ( ) . id === "perplexity/sonar-reasoning"
130- ) {
128+ if ( modelId . startsWith ( "deepseek/deepseek-r1" ) || modelId === "perplexity/sonar-reasoning" ) {
131129 // Recommended temperature for DeepSeek reasoning models
132130 defaultTemperature = DEEP_SEEK_DEFAULT_TEMPERATURE
133131 // DeepSeek highly recommends using user instead of system role
@@ -136,24 +134,37 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
136134 topP = 0.95
137135 }
138136
137+ let temperature = this . options . modelTemperature ?? defaultTemperature
138+
139+ if ( modelInfo . thinking ) {
140+ temperature = 1.0
141+ }
142+
139143 // https://openrouter.ai/docs/transforms
140144 let fullResponseText = ""
141- const stream = await this . client . chat . completions . create ( {
142- model : this . getModel ( ) . id ,
145+
146+ const completionParams : OpenRouterChatCompletionParams = {
147+ model : modelId ,
143148 max_tokens : maxTokens ,
144- temperature : this . options . modelTemperature ?? defaultTemperature ,
149+ temperature,
145150 top_p : topP ,
146151 messages : openAiMessages ,
147152 stream : true ,
148153 include_reasoning : true ,
149154 // This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
150155 ...( this . options . openRouterUseMiddleOutTransform && { transforms : [ "middle-out" ] } ) ,
151- } as OpenRouterChatCompletionParams )
156+ }
157+
158+ console . log ( "OpenRouter completionParams:" , completionParams )
159+
160+ const stream = await this . client . chat . completions . create ( completionParams )
152161
153162 let genId : string | undefined
154163
155164 for await ( const chunk of stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ) {
156- // openrouter returns an error object instead of the openai sdk throwing an error
165+ console . log ( "OpenRouter chunk:" , chunk )
166+
167+ // OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
157168 if ( "error" in chunk ) {
158169 const error = chunk . error as { message ?: string ; code ?: number }
159170 console . error ( `OpenRouter API Error: ${ error ?. code } - ${ error ?. message } ` )
@@ -165,19 +176,22 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
165176 }
166177
167178 const delta = chunk . choices [ 0 ] ?. delta
179+
168180 if ( "reasoning" in delta && delta . reasoning ) {
169181 yield {
170182 type : "reasoning" ,
171183 text : delta . reasoning ,
172184 } as ApiStreamChunk
173185 }
186+
174187 if ( delta ?. content ) {
175188 fullResponseText += delta . content
176189 yield {
177190 type : "text" ,
178191 text : delta . content ,
179192 } as ApiStreamChunk
180193 }
194+
181195 // if (chunk.usage) {
182196 // yield {
183197 // type: "usage",
@@ -187,10 +201,12 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
187201 // }
188202 }
189203
190- // retry fetching generation details
204+ // Retry fetching generation details.
191205 let attempt = 0
206+
192207 while ( attempt ++ < 10 ) {
193208 await delay ( 200 ) // FIXME: necessary delay to ensure generation endpoint is ready
209+
194210 try {
195211 const response = await axios . get ( `https://openrouter.ai/api/v1/generation?id=${ genId } ` , {
196212 headers : {
@@ -201,6 +217,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
201217
202218 const generation = response . data ?. data
203219 console . log ( "OpenRouter generation details:" , response . data )
220+
204221 yield {
205222 type : "usage" ,
206223 // cacheWriteTokens: 0,
@@ -211,20 +228,21 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
211228 totalCost : generation ?. total_cost || 0 ,
212229 fullResponseText,
213230 } as OpenRouterApiStreamUsageChunk
231+
214232 return
215233 } catch ( error ) {
216234 // ignore if fails
217235 console . error ( "Error fetching OpenRouter generation details:" , error )
218236 }
219237 }
220238 }
221- getModel ( ) : { id : string ; info : ModelInfo } {
239+
240+ getModel ( ) {
222241 const modelId = this . options . openRouterModelId
223242 const modelInfo = this . options . openRouterModelInfo
224- if ( modelId && modelInfo ) {
225- return { id : modelId , info : modelInfo }
226- }
227- return { id : openRouterDefaultModelId , info : openRouterDefaultModelInfo }
243+ return modelId && modelInfo
244+ ? { id : modelId , info : modelInfo }
245+ : { id : openRouterDefaultModelId , info : openRouterDefaultModelInfo }
228246 }
229247
230248 async completePrompt ( prompt : string ) : Promise < string > {
@@ -247,6 +265,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
247265 if ( error instanceof Error ) {
248266 throw new Error ( `OpenRouter completion error: ${ error . message } ` )
249267 }
268+
250269 throw error
251270 }
252271 }
@@ -268,21 +287,31 @@ export async function getOpenRouterModels() {
268287 inputPrice : parseApiPrice ( rawModel . pricing ?. prompt ) ,
269288 outputPrice : parseApiPrice ( rawModel . pricing ?. completion ) ,
270289 description : rawModel . description ,
290+ thinking : rawModel . id === "anthropic/claude-3.7-sonnet:thinking" ,
271291 }
272292
273293 switch ( rawModel . id ) {
294+ case "anthropic/claude-3.7-sonnet:thinking" :
274295 case "anthropic/claude-3.7-sonnet" :
275296 case "anthropic/claude-3.7-sonnet:beta" :
297+ modelInfo . maxTokens = 16_384
298+ modelInfo . supportsComputerUse = true
299+ modelInfo . supportsPromptCache = true
300+ modelInfo . cacheWritesPrice = 3.75
301+ modelInfo . cacheReadsPrice = 0.3
302+ break
276303 case "anthropic/claude-3.5-sonnet" :
277304 case "anthropic/claude-3.5-sonnet:beta" :
278305 // NOTE: This needs to be synced with api.ts/openrouter default model info.
306+ modelInfo . maxTokens = 8_192
279307 modelInfo . supportsComputerUse = true
280308 modelInfo . supportsPromptCache = true
281309 modelInfo . cacheWritesPrice = 3.75
282310 modelInfo . cacheReadsPrice = 0.3
283311 break
284312 case "anthropic/claude-3.5-sonnet-20240620" :
285313 case "anthropic/claude-3.5-sonnet-20240620:beta" :
314+ modelInfo . maxTokens = 8_192
286315 modelInfo . supportsPromptCache = true
287316 modelInfo . cacheWritesPrice = 3.75
288317 modelInfo . cacheReadsPrice = 0.3
@@ -295,6 +324,7 @@ export async function getOpenRouterModels() {
295324 case "anthropic/claude-3.5-haiku:beta" :
296325 case "anthropic/claude-3.5-haiku-20241022" :
297326 case "anthropic/claude-3.5-haiku-20241022:beta" :
327+ modelInfo . maxTokens = 8_192
298328 modelInfo . supportsPromptCache = true
299329 modelInfo . cacheWritesPrice = 1.25
300330 modelInfo . cacheReadsPrice = 0.1
0 commit comments