@@ -21,12 +21,13 @@ import type { ApiStream } from "../transform/stream"
2121import { BaseProvider } from "./base-provider"
2222
2323const CACHE_TTL = 5
24-
24+ const CACHE_WRITE_FREQUENCY = 10
2525const CONTEXT_CACHE_TOKEN_MINIMUM = 4096
2626
2727type CacheEntry = {
2828 key : string
2929 count : number
30+ tokens ?: number
3031}
3132
3233type GeminiHandlerOptions = ApiHandlerOptions & {
@@ -96,7 +97,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
9697 cacheKey &&
9798 contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
9899
99- let cacheWrite = false
100+ let isCacheWriteQueued = false
100101
101102 if ( isCacheAvailable ) {
102103 const cacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
@@ -109,38 +110,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109110 )
110111 }
111112
112- if ( ! this . isCacheBusy ) {
113- this . isCacheBusy = true
114- const timestamp = Date . now ( )
115-
116- this . client . caches
117- . create ( {
118- model,
119- config : {
120- contents,
121- systemInstruction,
122- ttl : `${ CACHE_TTL * 60 } s` ,
123- httpOptions : { timeout : 120_000 } ,
124- } ,
125- } )
126- . then ( ( result ) => {
127- const { name, usageMetadata } = result
128-
129- if ( name ) {
130- this . contentCaches . set < CacheEntry > ( cacheKey , { key : name , count : contents . length } )
131- console . log (
132- `[GeminiHandler] cached ${ contents . length } messages (${ usageMetadata ?. totalTokenCount ?? "-" } tokens) in ${ Date . now ( ) - timestamp } ms` ,
133- )
134- }
135- } )
136- . catch ( ( error ) => {
137- console . error ( `[GeminiHandler] caches.create error` , error )
138- } )
139- . finally ( ( ) => {
140- this . isCacheBusy = false
141- } )
142-
143- cacheWrite = true
113+ // If `CACHE_WRITE_FREQUENCY` messages have been appended since the
114+ // last cache write then write a new cache entry.
115+ if ( ! cacheEntry || ( uncachedContent && uncachedContent . length >= CACHE_WRITE_FREQUENCY ) ) {
116+ isCacheWriteQueued = true
144117 }
145118 }
146119
@@ -163,6 +136,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
163136
164137 const result = await this . client . models . generateContentStream ( params )
165138
139+ if ( cacheKey && isCacheWriteQueued ) {
140+ this . writeCache ( { cacheKey, model, systemInstruction, contents } )
141+ }
142+
166143 let lastUsageMetadata : GenerateContentResponseUsageMetadata | undefined
167144
168145 for await ( const chunk of result ) {
@@ -178,7 +155,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
178155 if ( lastUsageMetadata ) {
179156 const inputTokens = lastUsageMetadata . promptTokenCount ?? 0
180157 const outputTokens = lastUsageMetadata . candidatesTokenCount ?? 0
181- const cacheWriteTokens = cacheWrite ? inputTokens : undefined
158+ const cacheWriteTokens = isCacheWriteQueued ? inputTokens : undefined
182159 const cacheReadTokens = lastUsageMetadata . cachedContentTokenCount
183160 const reasoningTokens = lastUsageMetadata . thoughtsTokenCount
184161
@@ -338,4 +315,76 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
338315
339316 return totalCost
340317 }
318+
319+ private writeCache ( {
320+ cacheKey,
321+ model,
322+ systemInstruction,
323+ contents,
324+ } : {
325+ cacheKey : string
326+ model : string
327+ systemInstruction : string
328+ contents : Content [ ]
329+ } ) {
330+ if ( this . isCacheBusy ) {
331+ return
332+ }
333+
334+ this . isCacheBusy = true
335+ const timestamp = Date . now ( )
336+
337+ const previousCacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
338+
339+ this . client . caches
340+ . create ( {
341+ model,
342+ config : {
343+ contents,
344+ systemInstruction,
345+ ttl : `${ CACHE_TTL * 60 } s` ,
346+ httpOptions : { timeout : 120_000 } ,
347+ } ,
348+ } )
349+ . then ( ( result ) => {
350+ const { name, usageMetadata } = result
351+
352+ if ( name ) {
353+ const newCacheEntry : CacheEntry = {
354+ key : name ,
355+ count : contents . length ,
356+ tokens : usageMetadata ?. totalTokenCount ,
357+ }
358+
359+ this . contentCaches . set < CacheEntry > ( cacheKey , newCacheEntry )
360+
361+ console . log (
362+ `[GeminiHandler] created cache entry ${ newCacheEntry . key } -> ${ newCacheEntry . count } messages, ${ newCacheEntry . tokens } tokens (${ Date . now ( ) - timestamp } ms)` ,
363+ )
364+
365+ if ( previousCacheEntry ) {
366+ const timestamp = Date . now ( )
367+
368+ this . client . caches
369+ . delete ( { name : previousCacheEntry . key } )
370+ . then ( ( ) => {
371+ console . log (
372+ `[GeminiHandler] deleted cache entry ${ previousCacheEntry . key } -> ${ previousCacheEntry . count } messages, ${ previousCacheEntry . tokens } tokens (${ Date . now ( ) - timestamp } ms)` ,
373+ )
374+ } )
375+ . catch ( ( error ) => {
376+ console . error (
377+ `[GeminiHandler] failed to delete stale cache entry ${ previousCacheEntry . key } -> ${ error instanceof Error ? error . message : String ( error ) } ` ,
378+ )
379+ } )
380+ }
381+ }
382+ } )
383+ . catch ( ( error ) => {
384+ console . error ( `[GeminiHandler] caches.create error` , error )
385+ } )
386+ . finally ( ( ) => {
387+ this . isCacheBusy = false
388+ } )
389+ }
341390}
0 commit comments