@@ -6,7 +6,7 @@ import OpenAI from "openai"
66import { ApiHandlerOptions , ModelInfo , openRouterDefaultModelId , openRouterDefaultModelInfo } from "../../shared/api"
77import { parseApiPrice } from "../../utils/cost"
88import { convertToOpenAiMessages } from "../transform/openai-format"
9- import { ApiStreamChunk , ApiStreamUsageChunk } from "../transform/stream"
9+ import { ApiStreamChunk } from "../transform/stream"
1010import { convertToR1Format } from "../transform/r1-format"
1111
1212import { DEFAULT_HEADERS , DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
@@ -28,6 +28,22 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
2828 }
2929}
3030
31+ // See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
32+ // `CompletionsAPI.CompletionUsage`
33+ // See also: https://openrouter.ai/docs/use-cases/usage-accounting
34+ interface CompletionUsage {
35+ completion_tokens ?: number
36+ completion_tokens_details ?: {
37+ reasoning_tokens ?: number
38+ }
39+ prompt_tokens ?: number
40+ prompt_tokens_details ?: {
41+ cached_tokens ?: number
42+ }
43+ total_tokens ?: number
44+ cost ?: number
45+ }
46+
3147export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
3248 protected options : ApiHandlerOptions
3349 private client : OpenAI
@@ -46,7 +62,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
4662 systemPrompt : string ,
4763 messages : Anthropic . Messages . MessageParam [ ] ,
4864 ) : AsyncGenerator < ApiStreamChunk > {
49- let { id : modelId , maxTokens, thinking, temperature, topP, reasoningEffort } = this . getModel ( )
65+ let {
66+ id : modelId ,
67+ maxTokens,
68+ thinking,
69+ temperature,
70+ supportsPromptCache,
71+ topP,
72+ reasoningEffort,
73+ } = this . getModel ( )
5074
5175 // Convert Anthropic messages to OpenAI format.
5276 let openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
@@ -59,46 +83,42 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
5983 openAiMessages = convertToR1Format ( [ { role : "user" , content : systemPrompt } , ...messages ] )
6084 }
6185
62- // prompt caching: https://openrouter.ai/docs/prompt-caching
63- // this is specifically for claude models (some models may 'support prompt caching' automatically without this)
64- switch ( true ) {
65- case modelId . startsWith ( "anthropic/" ) :
66- openAiMessages [ 0 ] = {
67- role : "system" ,
68- content : [
69- {
70- type : "text" ,
71- text : systemPrompt ,
72- // @ts -ignore-next-line
73- cache_control : { type : "ephemeral" } ,
74- } ,
75- ] ,
76- }
86+ // Prompt caching: https://openrouter.ai/docs/prompt-caching
87+ // Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
88+ if ( supportsPromptCache ) {
89+ openAiMessages [ 0 ] = {
90+ role : "system" ,
91+ content : [
92+ {
93+ type : "text" ,
94+ text : systemPrompt ,
95+ // @ts -ignore-next-line
96+ cache_control : { type : "ephemeral" } ,
97+ } ,
98+ ] ,
99+ }
77100
78- // Add cache_control to the last two user messages
79- // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
80- const lastTwoUserMessages = openAiMessages . filter ( ( msg ) => msg . role === "user" ) . slice ( - 2 )
101+ // Add cache_control to the last two user messages
102+ // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
103+ const lastTwoUserMessages = openAiMessages . filter ( ( msg ) => msg . role === "user" ) . slice ( - 2 )
81104
82- lastTwoUserMessages . forEach ( ( msg ) => {
83- if ( typeof msg . content === "string" ) {
84- msg . content = [ { type : "text" , text : msg . content } ]
85- }
105+ lastTwoUserMessages . forEach ( ( msg ) => {
106+ if ( typeof msg . content === "string" ) {
107+ msg . content = [ { type : "text" , text : msg . content } ]
108+ }
86109
87- if ( Array . isArray ( msg . content ) ) {
88- // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
89- let lastTextPart = msg . content . filter ( ( part ) => part . type === "text" ) . pop ( )
110+ if ( Array . isArray ( msg . content ) ) {
111+ // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
112+ let lastTextPart = msg . content . filter ( ( part ) => part . type === "text" ) . pop ( )
90113
91- if ( ! lastTextPart ) {
92- lastTextPart = { type : "text" , text : "..." }
93- msg . content . push ( lastTextPart )
94- }
95- // @ts -ignore-next-line
96- lastTextPart [ "cache_control" ] = { type : "ephemeral" }
114+ if ( ! lastTextPart ) {
115+ lastTextPart = { type : "text" , text : "..." }
116+ msg . content . push ( lastTextPart )
97117 }
98- } )
99- break
100- default :
101- break
118+ // @ts -ignore-next-line
119+ lastTextPart [ "cache_control" ] = { type : "ephemeral" }
120+ }
121+ } )
102122 }
103123
104124 // https://openrouter.ai/docs/transforms
@@ -125,9 +145,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
125145
126146 const stream = await this . client . chat . completions . create ( completionParams )
127147
128- let lastUsage
148+ let lastUsage : CompletionUsage | undefined = undefined
129149
130- for await ( const chunk of stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ) {
150+ for await ( const chunk of stream ) {
131151 // OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
132152 if ( "error" in chunk ) {
133153 const error = chunk . error as { message ?: string ; code ?: number }
@@ -137,13 +157,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
137157
138158 const delta = chunk . choices [ 0 ] ?. delta
139159
140- if ( "reasoning" in delta && delta . reasoning ) {
141- yield { type : "reasoning" , text : delta . reasoning } as ApiStreamChunk
160+ if ( "reasoning" in delta && delta . reasoning && typeof delta . reasoning === "string" ) {
161+ yield { type : "reasoning" , text : delta . reasoning }
142162 }
143163
144164 if ( delta ?. content ) {
145165 fullResponseText += delta . content
146- yield { type : "text" , text : delta . content } as ApiStreamChunk
166+ yield { type : "text" , text : delta . content }
147167 }
148168
149169 if ( chunk . usage ) {
@@ -152,16 +172,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
152172 }
153173
154174 if ( lastUsage ) {
155- yield this . processUsageMetrics ( lastUsage )
156- }
157- }
158-
159- processUsageMetrics ( usage : any ) : ApiStreamUsageChunk {
160- return {
161- type : "usage" ,
162- inputTokens : usage ?. prompt_tokens || 0 ,
163- outputTokens : usage ?. completion_tokens || 0 ,
164- totalCost : usage ?. cost || 0 ,
175+ yield {
176+ type : "usage" ,
177+ inputTokens : lastUsage . prompt_tokens || 0 ,
178+ outputTokens : lastUsage . completion_tokens || 0 ,
179+ // Waiting on OpenRouter to figure out what this represents in the Gemini case
180+ // and how to best support it.
181+ // cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens ,
182+ reasoningTokens : lastUsage . completion_tokens_details ?. reasoning_tokens ,
183+ totalCost : lastUsage . cost || 0 ,
184+ }
165185 }
166186 }
167187
@@ -171,7 +191,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
171191
172192 let id = modelId ?? openRouterDefaultModelId
173193 const info = modelInfo ?? openRouterDefaultModelInfo
174-
194+ const supportsPromptCache = modelInfo ?. supportsPromptCache
175195 const isDeepSeekR1 = id . startsWith ( "deepseek/deepseek-r1" ) || modelId === "perplexity/sonar-reasoning"
176196 const defaultTemperature = isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0
177197 const topP = isDeepSeekR1 ? 0.95 : undefined
@@ -180,6 +200,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
180200 id,
181201 info,
182202 ...getModelParams ( { options : this . options , model : info , defaultTemperature } ) ,
203+ supportsPromptCache,
183204 topP,
184205 }
185206 }
@@ -269,6 +290,11 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
269290 modelInfo . cacheReadsPrice = 0.03
270291 modelInfo . maxTokens = 8192
271292 break
293+ case rawModel . id . startsWith ( "google/gemini-2.5-pro-preview-03-25" ) :
294+ case rawModel . id . startsWith ( "google/gemini-2.0-flash-001" ) :
295+ case rawModel . id . startsWith ( "google/gemini-flash-1.5" ) :
296+ modelInfo . supportsPromptCache = true
297+ break
272298 default :
273299 break
274300 }
0 commit comments