@@ -21,6 +21,8 @@ import { BaseProvider } from "./base-provider"
2121
2222const CACHE_TTL = 5
2323
24+ const CONTEXT_CACHE_TOKEN_MINIMUM = 4096
25+
2426type CacheEntry = {
2527 key : string
2628 count : number
@@ -46,19 +48,25 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
4648 const { id : model , thinkingConfig, maxOutputTokens, info } = this . getModel ( )
4749
4850 const contents = messages . map ( convertAnthropicMessageToGemini )
49- // This is just an approximation for now; we can use tiktoken eventually.
5051 const contentsLength = systemInstruction . length + getMessagesLength ( contents )
5152
5253 let uncachedContent : Content [ ] | undefined = undefined
5354 let cachedContent : string | undefined = undefined
5455 let cacheWriteTokens : number | undefined = undefined
5556
57+ // The minimum input token count for context caching is 4,096.
58+ // For a basic appoximation we assume 4 characters per token.
59+ // We can use tiktoken eventually to get a more accurat token count.
60+ // https://ai.google.dev/gemini-api/docs/caching?lang=node
61+ // https://ai.google.dev/gemini-api/docs/tokens?lang=node
5662 const isCacheAvailable =
57- info . supportsPromptCache && this . options . promptCachingEnabled && cacheKey && contentsLength > 16_384
63+ info . supportsPromptCache &&
64+ this . options . promptCachingEnabled &&
65+ cacheKey &&
66+ contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
5867
5968 console . log ( `[GeminiHandler] isCacheAvailable=${ isCacheAvailable } , contentsLength=${ contentsLength } ` )
6069
61- // https://ai.google.dev/gemini-api/docs/caching?lang=node
6270 if ( isCacheAvailable ) {
6371 const cacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
6472
0 commit comments