@@ -28,6 +28,25 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
2828 }
2929}
3030
31+ // See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
32+ // `CompletionsAPI.CompletionUsage`
33+ interface CompletionUsage {
34+ completion_tokens ?: number
35+ prompt_tokens ?: number
36+ total_tokens ?: number
37+ cost ?: number
38+
39+ /**
40+ * Breakdown of tokens used in a completion.
41+ */
42+ // completion_tokens_details?: CompletionUsage.CompletionTokensDetails;
43+
44+ /**
45+ * Breakdown of tokens used in the prompt.
46+ */
47+ // prompt_tokens_details?: CompletionUsage.PromptTokensDetails;
48+ }
49+
3150export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
3251 protected options : ApiHandlerOptions
3352 private client : OpenAI
@@ -46,7 +65,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
4665 systemPrompt : string ,
4766 messages : Anthropic . Messages . MessageParam [ ] ,
4867 ) : AsyncGenerator < ApiStreamChunk > {
49- let { id : modelId , maxTokens, thinking, temperature, topP, reasoningEffort } = this . getModel ( )
68+ let {
69+ id : modelId ,
70+ maxTokens,
71+ thinking,
72+ temperature,
73+ supportsPromptCache,
74+ topP,
75+ reasoningEffort,
76+ } = this . getModel ( )
5077
5178 // Convert Anthropic messages to OpenAI format.
5279 let openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
@@ -59,46 +86,42 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
5986 openAiMessages = convertToR1Format ( [ { role : "user" , content : systemPrompt } , ...messages ] )
6087 }
6188
62- // prompt caching: https://openrouter.ai/docs/prompt-caching
63- // this is specifically for claude models (some models may 'support prompt caching' automatically without this)
64- switch ( true ) {
65- case modelId . startsWith ( "anthropic/" ) :
66- openAiMessages [ 0 ] = {
67- role : "system" ,
68- content : [
69- {
70- type : "text" ,
71- text : systemPrompt ,
72- // @ts -ignore-next-line
73- cache_control : { type : "ephemeral" } ,
74- } ,
75- ] ,
76- }
89+ // Prompt caching: https://openrouter.ai/docs/prompt-caching
90+ // Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
91+ if ( supportsPromptCache ) {
92+ openAiMessages [ 0 ] = {
93+ role : "system" ,
94+ content : [
95+ {
96+ type : "text" ,
97+ text : systemPrompt ,
98+ // @ts -ignore-next-line
99+ cache_control : { type : "ephemeral" } ,
100+ } ,
101+ ] ,
102+ }
77103
78- // Add cache_control to the last two user messages
79- // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
80- const lastTwoUserMessages = openAiMessages . filter ( ( msg ) => msg . role === "user" ) . slice ( - 2 )
104+ // Add cache_control to the last two user messages
105+ // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
106+ const lastTwoUserMessages = openAiMessages . filter ( ( msg ) => msg . role === "user" ) . slice ( - 2 )
81107
82- lastTwoUserMessages . forEach ( ( msg ) => {
83- if ( typeof msg . content === "string" ) {
84- msg . content = [ { type : "text" , text : msg . content } ]
85- }
108+ lastTwoUserMessages . forEach ( ( msg ) => {
109+ if ( typeof msg . content === "string" ) {
110+ msg . content = [ { type : "text" , text : msg . content } ]
111+ }
86112
87- if ( Array . isArray ( msg . content ) ) {
88- // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
89- let lastTextPart = msg . content . filter ( ( part ) => part . type === "text" ) . pop ( )
113+ if ( Array . isArray ( msg . content ) ) {
114+ // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
115+ let lastTextPart = msg . content . filter ( ( part ) => part . type === "text" ) . pop ( )
90116
91- if ( ! lastTextPart ) {
92- lastTextPart = { type : "text" , text : "..." }
93- msg . content . push ( lastTextPart )
94- }
95- // @ts -ignore-next-line
96- lastTextPart [ "cache_control" ] = { type : "ephemeral" }
117+ if ( ! lastTextPart ) {
118+ lastTextPart = { type : "text" , text : "..." }
119+ msg . content . push ( lastTextPart )
97120 }
98- } )
99- break
100- default :
101- break
121+ // @ts -ignore-next-line
122+ lastTextPart [ "cache_control" ] = { type : "ephemeral" }
123+ }
124+ } )
102125 }
103126
104127 // https://openrouter.ai/docs/transforms
@@ -125,9 +148,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
125148
126149 const stream = await this . client . chat . completions . create ( completionParams )
127150
128- let lastUsage
151+ let lastUsage : CompletionUsage | undefined = undefined
129152
130- for await ( const chunk of stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ) {
153+ for await ( const chunk of stream ) {
131154 // OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
132155 if ( "error" in chunk ) {
133156 const error = chunk . error as { message ?: string ; code ?: number }
@@ -152,16 +175,12 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
152175 }
153176
154177 if ( lastUsage ) {
155- yield this . processUsageMetrics ( lastUsage )
156- }
157- }
158-
159- processUsageMetrics ( usage : any ) : ApiStreamUsageChunk {
160- return {
161- type : "usage" ,
162- inputTokens : usage ?. prompt_tokens || 0 ,
163- outputTokens : usage ?. completion_tokens || 0 ,
164- totalCost : usage ?. cost || 0 ,
178+ yield {
179+ type : "usage" ,
180+ inputTokens : lastUsage . prompt_tokens || 0 ,
181+ outputTokens : lastUsage . completion_tokens || 0 ,
182+ totalCost : lastUsage ?. cost || 0 ,
183+ }
165184 }
166185 }
167186
@@ -171,7 +190,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
171190
172191 let id = modelId ?? openRouterDefaultModelId
173192 const info = modelInfo ?? openRouterDefaultModelInfo
174-
193+ const supportsPromptCache = modelInfo ?. supportsPromptCache
175194 const isDeepSeekR1 = id . startsWith ( "deepseek/deepseek-r1" ) || modelId === "perplexity/sonar-reasoning"
176195 const defaultTemperature = isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0
177196 const topP = isDeepSeekR1 ? 0.95 : undefined
@@ -180,6 +199,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
180199 id,
181200 info,
182201 ...getModelParams ( { options : this . options , model : info , defaultTemperature } ) ,
202+ supportsPromptCache,
183203 topP,
184204 }
185205 }
@@ -269,6 +289,24 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
269289 modelInfo . cacheReadsPrice = 0.03
270290 modelInfo . maxTokens = 8192
271291 break
292+ // case rawModel.id.startsWith("google/gemini-2.5-flash-preview"):
293+ // modelInfo.supportsPromptCache = true
294+ // break
295+ case rawModel . id . startsWith ( "google/gemini-2.5-pro-preview-03-25" ) :
296+ modelInfo . supportsPromptCache = true
297+ break
298+ case rawModel . id . startsWith ( "google/gemini-2.0-flash-001" ) :
299+ modelInfo . supportsPromptCache = true
300+ break
301+ // case rawModel.id.startsWith("google/gemini-2.0-flash-lite-001"):
302+ // modelInfo.supportsPromptCache = true
303+ // break
304+ case rawModel . id . startsWith ( "google/gemini-flash-1.5" ) :
305+ modelInfo . supportsPromptCache = true
306+ break
307+ case rawModel . id . startsWith ( "google/gemini-pro-1.5" ) :
308+ modelInfo . supportsPromptCache = true
309+ break
272310 default :
273311 break
274312 }
0 commit comments