Skip to content

Commit 8639da0

Browse files
committed
Add isCacheAvailable to OpenRouter provider
1 parent fc8e672 commit 8639da0

File tree

2 files changed

+45
-29
lines changed

2 files changed

+45
-29
lines changed

src/api/providers/gemini.ts

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
5454

5555
let uncachedContent: Content[] | undefined = undefined
5656
let cachedContent: string | undefined = undefined
57-
let cacheWriteTokens: number | undefined = undefined
5857

5958
// The minimum input token count for context caching is 4,096.
6059
// For a basic approximation we assume 4 characters per token.
@@ -67,6 +66,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
6766
cacheKey &&
6867
contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
6968

69+
let cacheWrite = false
70+
7071
if (isCacheAvailable) {
7172
const cacheEntry = this.contentCaches.get<CacheEntry>(cacheKey)
7273

@@ -97,9 +98,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
9798

9899
if (name) {
99100
this.contentCaches.set<CacheEntry>(cacheKey, { key: name, count: contents.length })
100-
cacheWriteTokens = usageMetadata?.totalTokenCount ?? 0
101101
console.log(
102-
`[GeminiHandler] cached ${contents.length} messages (${cacheWriteTokens} tokens) in ${Date.now() - timestamp}ms`,
102+
`[GeminiHandler] cached ${contents.length} messages (${usageMetadata?.totalTokenCount ?? "-"} tokens) in ${Date.now() - timestamp}ms`,
103103
)
104104
}
105105
})
@@ -109,6 +109,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109109
.finally(() => {
110110
this.isCacheBusy = false
111111
})
112+
113+
cacheWrite = true
112114
}
113115
}
114116

@@ -146,27 +148,24 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
146148
if (lastUsageMetadata) {
147149
const inputTokens = lastUsageMetadata.promptTokenCount ?? 0
148150
const outputTokens = lastUsageMetadata.candidatesTokenCount ?? 0
151+
const cacheWriteTokens = cacheWrite ? inputTokens : 0
149152
const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
150153
const reasoningTokens = lastUsageMetadata.thoughtsTokenCount
151154

152-
const totalCost = isCacheUsed
153-
? this.calculateCost({
154-
info,
155-
inputTokens,
156-
outputTokens,
157-
cacheWriteTokens,
158-
cacheReadTokens,
159-
})
160-
: undefined
161-
162155
yield {
163156
type: "usage",
164157
inputTokens,
165158
outputTokens,
166159
cacheWriteTokens,
167160
cacheReadTokens,
168161
reasoningTokens,
169-
totalCost,
162+
totalCost: this.calculateCost({
163+
info,
164+
inputTokens,
165+
outputTokens,
166+
cacheWriteTokens,
167+
cacheReadTokens,
168+
}),
170169
}
171170
}
172171
}
@@ -250,8 +249,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
250249
info,
251250
inputTokens,
252251
outputTokens,
253-
cacheWriteTokens,
254-
cacheReadTokens,
252+
cacheWriteTokens = 0,
253+
cacheReadTokens = 0,
255254
}: {
256255
info: ModelInfo
257256
inputTokens: number
@@ -281,21 +280,32 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
281280
}
282281
}
283282

284-
let inputTokensCost = inputPrice * (inputTokens / 1_000_000)
285-
let outputTokensCost = outputPrice * (outputTokens / 1_000_000)
286-
let cacheWriteCost = 0
287-
let cacheReadCost = 0
283+
// Subtract the cached input tokens from the total input tokens.
284+
const uncachedInputTokens = inputTokens - cacheReadTokens
288285

289-
if (cacheWriteTokens) {
290-
cacheWriteCost = cacheWritesPrice * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
286+
let cacheWriteCost =
287+
cacheWriteTokens > 0 ? cacheWritesPrice * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60) : 0
288+
let cacheReadCost = cacheReadTokens > 0 ? cacheReadsPrice * (cacheReadTokens / 1_000_000) : 0
289+
290+
const inputTokensCost = inputPrice * (uncachedInputTokens / 1_000_000)
291+
const outputTokensCost = outputPrice * (outputTokens / 1_000_000)
292+
const totalCost = inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
293+
294+
const trace: Record<string, { price: number; tokens: number; cost: number }> = {
295+
input: { price: inputPrice, tokens: uncachedInputTokens, cost: inputTokensCost },
296+
output: { price: outputPrice, tokens: outputTokens, cost: outputTokensCost },
291297
}
292298

293-
if (cacheReadTokens) {
294-
const uncachedReadTokens = inputTokens - cacheReadTokens
295-
cacheReadCost = cacheReadsPrice * (cacheReadTokens / 1_000_000)
296-
inputTokensCost = inputPrice * (uncachedReadTokens / 1_000_000)
299+
if (cacheWriteTokens > 0) {
300+
trace.cacheWrite = { price: cacheWritesPrice, tokens: cacheWriteTokens, cost: cacheWriteCost }
297301
}
298302

299-
return inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
303+
if (cacheReadTokens > 0) {
304+
trace.cacheRead = { price: cacheReadsPrice, tokens: cacheReadTokens, cost: cacheReadCost }
305+
}
306+
307+
// console.log(`[GeminiHandler] calculateCost -> ${totalCost}`, trace)
308+
309+
return totalCost
300310
}
301311
}

src/api/providers/openrouter.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
6565
systemPrompt: string,
6666
messages: Anthropic.Messages.MessageParam[],
6767
): AsyncGenerator<ApiStreamChunk> {
68-
let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort, info } = this.getModel()
68+
let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort, promptCache } = this.getModel()
6969

7070
// Convert Anthropic messages to OpenAI format.
7171
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -78,11 +78,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
7878
openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
7979
}
8080

81+
const isCacheAvailable = promptCache.supported && (!promptCache.optional || this.options.promptCachingEnabled)
82+
8183
// Prompt caching: https://openrouter.ai/docs/prompt-caching
8284
// Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
8385
// Note that we don't check the `ModelInfo` object because it is cached
8486
// in the settings for OpenRouter and the value could be stale.
85-
if (PROMPT_CACHING_MODELS.has(modelId)) {
87+
if (isCacheAvailable) {
8688
openAiMessages[0] = {
8789
role: "system",
8890
// @ts-ignore-next-line
@@ -195,6 +197,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
195197
info,
196198
...getModelParams({ options: this.options, model: info, defaultTemperature }),
197199
topP,
200+
promptCache: {
201+
supported: PROMPT_CACHING_MODELS.has(id),
202+
optional: PROMPT_CACHING_MODELS.has(id),
203+
},
198204
}
199205
}
200206

0 commit comments

Comments
 (0)