Skip to content

Commit a7615bd

Browse files
committed
Gemini caching tweaks
1 parent 8641b21 commit a7615bd

File tree

1 file changed

+84
-35
lines changed

1 file changed

+84
-35
lines changed

src/api/providers/gemini.ts

Lines changed: 84 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ import type { ApiStream } from "../transform/stream"
2121
import { BaseProvider } from "./base-provider"
2222

2323
const CACHE_TTL = 5
24-
24+
const CACHE_WRITE_FREQUENCY = 10
2525
const CONTEXT_CACHE_TOKEN_MINIMUM = 4096
2626

2727
type CacheEntry = {
2828
key: string
2929
count: number
30+
tokens?: number
3031
}
3132

3233
type GeminiHandlerOptions = ApiHandlerOptions & {
@@ -96,7 +97,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
9697
cacheKey &&
9798
contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
9899

99-
let cacheWrite = false
100+
let isCacheWriteQueued = false
100101

101102
if (isCacheAvailable) {
102103
const cacheEntry = this.contentCaches.get<CacheEntry>(cacheKey)
@@ -109,38 +110,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109110
)
110111
}
111112

112-
if (!this.isCacheBusy) {
113-
this.isCacheBusy = true
114-
const timestamp = Date.now()
115-
116-
this.client.caches
117-
.create({
118-
model,
119-
config: {
120-
contents,
121-
systemInstruction,
122-
ttl: `${CACHE_TTL * 60}s`,
123-
httpOptions: { timeout: 120_000 },
124-
},
125-
})
126-
.then((result) => {
127-
const { name, usageMetadata } = result
128-
129-
if (name) {
130-
this.contentCaches.set<CacheEntry>(cacheKey, { key: name, count: contents.length })
131-
console.log(
132-
`[GeminiHandler] cached ${contents.length} messages (${usageMetadata?.totalTokenCount ?? "-"} tokens) in ${Date.now() - timestamp}ms`,
133-
)
134-
}
135-
})
136-
.catch((error) => {
137-
console.error(`[GeminiHandler] caches.create error`, error)
138-
})
139-
.finally(() => {
140-
this.isCacheBusy = false
141-
})
142-
143-
cacheWrite = true
113+
// If `CACHE_WRITE_FREQUENCY` messages have been appended since the
114+
// last cache write then write a new cache entry.
115+
if (!cacheEntry || (uncachedContent && uncachedContent.length >= CACHE_WRITE_FREQUENCY)) {
116+
isCacheWriteQueued = true
144117
}
145118
}
146119

@@ -163,6 +136,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
163136

164137
const result = await this.client.models.generateContentStream(params)
165138

139+
if (cacheKey && isCacheWriteQueued) {
140+
this.writeCache({ cacheKey, model, systemInstruction, contents })
141+
}
142+
166143
let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined
167144

168145
for await (const chunk of result) {
@@ -178,7 +155,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
178155
if (lastUsageMetadata) {
179156
const inputTokens = lastUsageMetadata.promptTokenCount ?? 0
180157
const outputTokens = lastUsageMetadata.candidatesTokenCount ?? 0
181-
const cacheWriteTokens = cacheWrite ? inputTokens : undefined
158+
const cacheWriteTokens = isCacheWriteQueued ? inputTokens : undefined
182159
const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
183160
const reasoningTokens = lastUsageMetadata.thoughtsTokenCount
184161

@@ -338,4 +315,76 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
338315

339316
return totalCost
340317
}
318+
319+
private writeCache({
320+
cacheKey,
321+
model,
322+
systemInstruction,
323+
contents,
324+
}: {
325+
cacheKey: string
326+
model: string
327+
systemInstruction: string
328+
contents: Content[]
329+
}) {
330+
if (this.isCacheBusy) {
331+
return
332+
}
333+
334+
this.isCacheBusy = true
335+
const timestamp = Date.now()
336+
337+
const previousCacheEntry = this.contentCaches.get<CacheEntry>(cacheKey)
338+
339+
this.client.caches
340+
.create({
341+
model,
342+
config: {
343+
contents,
344+
systemInstruction,
345+
ttl: `${CACHE_TTL * 60}s`,
346+
httpOptions: { timeout: 120_000 },
347+
},
348+
})
349+
.then((result) => {
350+
const { name, usageMetadata } = result
351+
352+
if (name) {
353+
const newCacheEntry: CacheEntry = {
354+
key: name,
355+
count: contents.length,
356+
tokens: usageMetadata?.totalTokenCount,
357+
}
358+
359+
this.contentCaches.set<CacheEntry>(cacheKey, newCacheEntry)
360+
361+
console.log(
362+
`[GeminiHandler] created cache entry ${newCacheEntry.key} -> ${newCacheEntry.count} messages, ${newCacheEntry.tokens} tokens (${Date.now() - timestamp}ms)`,
363+
)
364+
365+
if (previousCacheEntry) {
366+
const timestamp = Date.now()
367+
368+
this.client.caches
369+
.delete({ name: previousCacheEntry.key })
370+
.then(() => {
371+
console.log(
372+
`[GeminiHandler] deleted cache entry ${previousCacheEntry.key} -> ${previousCacheEntry.count} messages, ${previousCacheEntry.tokens} tokens (${Date.now() - timestamp}ms)`,
373+
)
374+
})
375+
.catch((error) => {
376+
console.error(
377+
`[GeminiHandler] failed to delete stale cache entry ${previousCacheEntry.key} -> ${error instanceof Error ? error.message : String(error)}`,
378+
)
379+
})
380+
}
381+
}
382+
})
383+
.catch((error) => {
384+
console.error(`[GeminiHandler] caches.create error`, error)
385+
})
386+
.finally(() => {
387+
this.isCacheBusy = false
388+
})
389+
}
341390
}

0 commit comments

Comments
 (0)