@@ -4,27 +4,42 @@ import {
44 type GenerateContentResponseUsageMetadata ,
55 type GenerateContentParameters ,
66 type Content ,
7+ CreateCachedContentConfig ,
78} from "@google/genai"
9+ import NodeCache from "node-cache"
810
911import { SingleCompletionHandler } from "../"
1012import type { ApiHandlerOptions , GeminiModelId , ModelInfo } from "../../shared/api"
1113import { geminiDefaultModelId , geminiModels } from "../../shared/api"
12- import { convertAnthropicContentToGemini , convertAnthropicMessageToGemini } from "../transform/gemini-format"
14+ import {
15+ convertAnthropicContentToGemini ,
16+ convertAnthropicMessageToGemini ,
17+ getMessagesLength ,
18+ } from "../transform/gemini-format"
1319import type { ApiStream } from "../transform/stream"
1420import { BaseProvider } from "./base-provider"
1521
1622const CACHE_TTL = 5
1723
24+ const CONTEXT_CACHE_TOKEN_MINIMUM = 4096
25+
26+ type CacheEntry = {
27+ key : string
28+ count : number
29+ }
30+
1831export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
1932 protected options : ApiHandlerOptions
33+
2034 private client : GoogleGenAI
21- private contentCaches : Map < string , { key : string ; count : number } >
35+ private contentCaches : NodeCache
36+ private isCacheBusy = false
2237
2338 constructor ( options : ApiHandlerOptions ) {
2439 super ( )
2540 this . options = options
2641 this . client = new GoogleGenAI ( { apiKey : options . geminiApiKey ?? "not-provided" } )
27- this . contentCaches = new Map ( )
42+ this . contentCaches = new NodeCache ( { stdTTL : 5 * 60 , checkperiod : 5 * 60 } )
2843 }
2944
3045 async * createMessage (
@@ -35,36 +50,76 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
3550 const { id : model , thinkingConfig, maxOutputTokens, info } = this . getModel ( )
3651
3752 const contents = messages . map ( convertAnthropicMessageToGemini )
53+ const contentsLength = systemInstruction . length + getMessagesLength ( contents )
54+
3855 let uncachedContent : Content [ ] | undefined = undefined
3956 let cachedContent : string | undefined = undefined
4057 let cacheWriteTokens : number | undefined = undefined
4158
59+ // The minimum input token count for context caching is 4,096.
60+ // For a basic approximation we assume 4 characters per token.
61+ // We can use tiktoken eventually to get a more accurat token count.
4262 // https://ai.google.dev/gemini-api/docs/caching?lang=node
43- // if (info.supportsPromptCache && cacheKey) {
44- // const cacheEntry = this.contentCaches.get(cacheKey)
45-
46- // if (cacheEntry) {
47- // uncachedContent = contents.slice(cacheEntry.count, contents.length)
48- // cachedContent = cacheEntry.key
49- // }
63+ // https://ai.google.dev/gemini-api/docs/tokens?lang=node
64+ const isCacheAvailable =
65+ info . supportsPromptCache &&
66+ this . options . promptCachingEnabled &&
67+ cacheKey &&
68+ contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
69+
70+ if ( isCacheAvailable ) {
71+ const cacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
72+
73+ if ( cacheEntry ) {
74+ uncachedContent = contents . slice ( cacheEntry . count , contents . length )
75+ cachedContent = cacheEntry . key
76+ console . log (
77+ `[GeminiHandler] using ${ cacheEntry . count } cached messages (${ cacheEntry . key } ) and ${ uncachedContent . length } uncached messages` ,
78+ )
79+ }
5080
51- // const newCacheEntry = await this.client.caches.create({
52- // model,
53- // config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
54- // })
81+ if ( ! this . isCacheBusy ) {
82+ this . isCacheBusy = true
83+ const timestamp = Date . now ( )
84+
85+ this . client . caches
86+ . create ( {
87+ model,
88+ config : {
89+ contents,
90+ systemInstruction,
91+ ttl : `${ CACHE_TTL * 60 } s` ,
92+ httpOptions : { timeout : 120_000 } ,
93+ } ,
94+ } )
95+ . then ( ( result ) => {
96+ const { name, usageMetadata } = result
97+
98+ if ( name ) {
99+ this . contentCaches . set < CacheEntry > ( cacheKey , { key : name , count : contents . length } )
100+ cacheWriteTokens = usageMetadata ?. totalTokenCount ?? 0
101+ console . log (
102+ `[GeminiHandler] cached ${ contents . length } messages (${ cacheWriteTokens } tokens) in ${ Date . now ( ) - timestamp } ms` ,
103+ )
104+ }
105+ } )
106+ . catch ( ( error ) => {
107+ console . error ( `[GeminiHandler] caches.create error` , error )
108+ } )
109+ . finally ( ( ) => {
110+ this . isCacheBusy = false
111+ } )
112+ }
113+ }
55114
56- // if (newCacheEntry.name) {
57- // this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
58- // cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
59- // }
60- // }
115+ const isCacheUsed = ! ! cachedContent
61116
62117 const params : GenerateContentParameters = {
63118 model,
64119 contents : uncachedContent ?? contents ,
65120 config : {
66121 cachedContent,
67- systemInstruction : cachedContent ? undefined : systemInstruction ,
122+ systemInstruction : isCacheUsed ? undefined : systemInstruction ,
68123 httpOptions : this . options . googleGeminiBaseUrl
69124 ? { baseUrl : this . options . googleGeminiBaseUrl }
70125 : undefined ,
@@ -94,13 +149,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
94149 const cacheReadTokens = lastUsageMetadata . cachedContentTokenCount
95150 const reasoningTokens = lastUsageMetadata . thoughtsTokenCount
96151
97- // const totalCost = this.calculateCost({
98- // info,
99- // inputTokens,
100- // outputTokens,
101- // cacheWriteTokens,
102- // cacheReadTokens,
103- // })
152+ const totalCost = isCacheUsed
153+ ? this . calculateCost ( {
154+ info,
155+ inputTokens,
156+ outputTokens,
157+ cacheWriteTokens,
158+ cacheReadTokens,
159+ } )
160+ : undefined
104161
105162 yield {
106163 type : "usage" ,
@@ -109,7 +166,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109166 cacheWriteTokens,
110167 cacheReadTokens,
111168 reasoningTokens,
112- // totalCost,
169+ totalCost,
113170 }
114171 }
115172 }
0 commit comments