Skip to content

Commit 92f39e3

Browse files
committed
Gemini caching improvements
1 parent b75379b commit 92f39e3

File tree

3 files changed

+78
-30
lines changed

3 files changed

+78
-30
lines changed

src/api/providers/gemini.ts

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,38 @@ import {
44
type GenerateContentResponseUsageMetadata,
55
type GenerateContentParameters,
66
type Content,
7+
CreateCachedContentConfig,
78
} from "@google/genai"
9+
import NodeCache from "node-cache"
810

911
import { SingleCompletionHandler } from "../"
1012
import type { ApiHandlerOptions, GeminiModelId, ModelInfo } from "../../shared/api"
1113
import { geminiDefaultModelId, geminiModels } from "../../shared/api"
12-
import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format"
14+
import {
15+
convertAnthropicContentToGemini,
16+
convertAnthropicMessageToGemini,
17+
getMessagesLength,
18+
} from "../transform/gemini-format"
1319
import type { ApiStream } from "../transform/stream"
1420
import { BaseProvider } from "./base-provider"
1521

1622
const CACHE_TTL = 5
1723

24+
type CacheEntry = {
25+
key: string
26+
count: number
27+
}
28+
1829
export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
1930
protected options: ApiHandlerOptions
2031
private client: GoogleGenAI
21-
private contentCaches: Map<string, { key: string; count: number }>
32+
private contentCaches: NodeCache
2233

2334
constructor(options: ApiHandlerOptions) {
2435
super()
2536
this.options = options
2637
this.client = new GoogleGenAI({ apiKey: options.geminiApiKey ?? "not-provided" })
27-
this.contentCaches = new Map()
38+
this.contentCaches = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
2839
}
2940

3041
async *createMessage(
@@ -35,36 +46,65 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
3546
const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel()
3647

3748
const contents = messages.map(convertAnthropicMessageToGemini)
49+
const contentsLength = systemInstruction.length + getMessagesLength(contents)
50+
3851
let uncachedContent: Content[] | undefined = undefined
3952
let cachedContent: string | undefined = undefined
4053
let cacheWriteTokens: number | undefined = undefined
4154

55+
const isCacheAvailable =
56+
info.supportsPromptCache && this.options.promptCachingEnabled && cacheKey && contentsLength > 16_384
57+
58+
console.log(`[GeminiHandler] isCacheAvailable=${isCacheAvailable}, contentsLength=${contentsLength}`)
59+
4260
// https://ai.google.dev/gemini-api/docs/caching?lang=node
43-
// if (info.supportsPromptCache && cacheKey) {
44-
// const cacheEntry = this.contentCaches.get(cacheKey)
61+
if (isCacheAvailable) {
62+
const cacheEntry = this.contentCaches.get<CacheEntry>(cacheKey)
63+
64+
if (cacheEntry) {
65+
uncachedContent = contents.slice(cacheEntry.count, contents.length)
66+
cachedContent = cacheEntry.key
67+
console.log(
68+
`[GeminiHandler] using ${cacheEntry.count} cached messages (${cacheEntry.key}) and ${uncachedContent.length} uncached messages`,
69+
)
70+
}
4571

46-
// if (cacheEntry) {
47-
// uncachedContent = contents.slice(cacheEntry.count, contents.length)
48-
// cachedContent = cacheEntry.key
49-
// }
72+
const timestamp = Date.now()
5073

51-
// const newCacheEntry = await this.client.caches.create({
52-
// model,
53-
// config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
54-
// })
74+
const config: CreateCachedContentConfig = {
75+
contents,
76+
systemInstruction,
77+
ttl: `${CACHE_TTL * 60}s`,
78+
httpOptions: { timeout: 10_000 },
79+
}
80+
81+
this.client.caches
82+
.create({ model, config })
83+
.then((result) => {
84+
console.log(`[GeminiHandler] caches.create result -> ${JSON.stringify(result)}`)
85+
const { name, usageMetadata } = result
86+
87+
if (name) {
88+
this.contentCaches.set<CacheEntry>(cacheKey, { key: name, count: contents.length })
89+
cacheWriteTokens = usageMetadata?.totalTokenCount ?? 0
90+
console.log(
91+
`[GeminiHandler] cached ${contents.length} messages (${cacheWriteTokens} tokens) in ${Date.now() - timestamp}ms`,
92+
)
93+
}
94+
})
95+
.catch((error) => {
96+
console.error(`[GeminiHandler] caches.create error`, error)
97+
})
98+
}
5599

56-
// if (newCacheEntry.name) {
57-
// this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
58-
// cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
59-
// }
60-
// }
100+
const isCacheUsed = !!cachedContent
61101

62102
const params: GenerateContentParameters = {
63103
model,
64104
contents: uncachedContent ?? contents,
65105
config: {
66106
cachedContent,
67-
systemInstruction: cachedContent ? undefined : systemInstruction,
107+
systemInstruction: isCacheUsed ? undefined : systemInstruction,
68108
httpOptions: this.options.googleGeminiBaseUrl
69109
? { baseUrl: this.options.googleGeminiBaseUrl }
70110
: undefined,
@@ -94,13 +134,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
94134
const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
95135
const reasoningTokens = lastUsageMetadata.thoughtsTokenCount
96136

97-
// const totalCost = this.calculateCost({
98-
// info,
99-
// inputTokens,
100-
// outputTokens,
101-
// cacheWriteTokens,
102-
// cacheReadTokens,
103-
// })
137+
const totalCost = isCacheUsed
138+
? this.calculateCost({
139+
info,
140+
inputTokens,
141+
outputTokens,
142+
cacheWriteTokens,
143+
cacheReadTokens,
144+
})
145+
: undefined
104146

105147
yield {
106148
type: "usage",
@@ -109,7 +151,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109151
cacheWriteTokens,
110152
cacheReadTokens,
111153
reasoningTokens,
112-
// totalCost,
154+
totalCost,
113155
}
114156
}
115157
}

src/api/transform/gemini-format.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,9 @@ export function convertAnthropicMessageToGemini(message: Anthropic.Messages.Mess
7676
parts: convertAnthropicContentToGemini(message.content),
7777
}
7878
}
79+
80+
const getContentLength = ({ parts }: Content): number =>
81+
parts?.reduce((length, { text }) => length + (text?.length ?? 0), 0) ?? 0
82+
83+
export const getMessagesLength = (contents: Content[]): number =>
84+
contents.reduce((length, content) => length + getContentLength(content), 0)

src/shared/api.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ export const geminiModels = {
679679
maxTokens: 65_535,
680680
contextWindow: 1_048_576,
681681
supportsImages: true,
682-
supportsPromptCache: false,
682+
supportsPromptCache: true,
683683
isPromptCacheOptional: true,
684684
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
685685
outputPrice: 15,
@@ -704,7 +704,7 @@ export const geminiModels = {
704704
maxTokens: 8192,
705705
contextWindow: 1_048_576,
706706
supportsImages: true,
707-
supportsPromptCache: false,
707+
supportsPromptCache: true,
708708
isPromptCacheOptional: true,
709709
inputPrice: 0.1,
710710
outputPrice: 0.4,
@@ -755,7 +755,7 @@ export const geminiModels = {
755755
maxTokens: 8192,
756756
contextWindow: 1_048_576,
757757
supportsImages: true,
758-
supportsPromptCache: false,
758+
supportsPromptCache: true,
759759
isPromptCacheOptional: true,
760760
inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
761761
outputPrice: 0.6,

0 commit comments

Comments
 (0)