@@ -4,27 +4,38 @@ import {
44 type GenerateContentResponseUsageMetadata ,
55 type GenerateContentParameters ,
66 type Content ,
7+ CreateCachedContentConfig ,
78} from "@google/genai"
9+ import NodeCache from "node-cache"
810
911import { SingleCompletionHandler } from "../"
1012import type { ApiHandlerOptions , GeminiModelId , ModelInfo } from "../../shared/api"
1113import { geminiDefaultModelId , geminiModels } from "../../shared/api"
12- import { convertAnthropicContentToGemini , convertAnthropicMessageToGemini } from "../transform/gemini-format"
14+ import {
15+ convertAnthropicContentToGemini ,
16+ convertAnthropicMessageToGemini ,
17+ getMessagesLength ,
18+ } from "../transform/gemini-format"
1319import type { ApiStream } from "../transform/stream"
1420import { BaseProvider } from "./base-provider"
1521
1622const CACHE_TTL = 5
1723
24+ type CacheEntry = {
25+ key : string
26+ count : number
27+ }
28+
1829export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
1930 protected options : ApiHandlerOptions
2031 private client : GoogleGenAI
21- private contentCaches : Map < string , { key : string ; count : number } >
32+ private contentCaches : NodeCache
2233
2334 constructor ( options : ApiHandlerOptions ) {
2435 super ( )
2536 this . options = options
2637 this . client = new GoogleGenAI ( { apiKey : options . geminiApiKey ?? "not-provided" } )
27- this . contentCaches = new Map ( )
38+ this . contentCaches = new NodeCache ( { stdTTL : 5 * 60 , checkperiod : 5 * 60 } )
2839 }
2940
3041 async * createMessage (
@@ -35,36 +46,65 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
3546 const { id : model , thinkingConfig, maxOutputTokens, info } = this . getModel ( )
3647
3748 const contents = messages . map ( convertAnthropicMessageToGemini )
49+ const contentsLength = systemInstruction . length + getMessagesLength ( contents )
50+
3851 let uncachedContent : Content [ ] | undefined = undefined
3952 let cachedContent : string | undefined = undefined
4053 let cacheWriteTokens : number | undefined = undefined
4154
55+ const isCacheAvailable =
56+ info . supportsPromptCache && this . options . promptCachingEnabled && cacheKey && contentsLength > 16_384
57+
58+ console . log ( `[GeminiHandler] isCacheAvailable=${ isCacheAvailable } , contentsLength=${ contentsLength } ` )
59+
4260 // https://ai.google.dev/gemini-api/docs/caching?lang=node
43- // if (info.supportsPromptCache && cacheKey) {
44- // const cacheEntry = this.contentCaches.get(cacheKey)
61+ if ( isCacheAvailable ) {
62+ const cacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
63+
64+ if ( cacheEntry ) {
65+ uncachedContent = contents . slice ( cacheEntry . count , contents . length )
66+ cachedContent = cacheEntry . key
67+ console . log (
68+ `[GeminiHandler] using ${ cacheEntry . count } cached messages (${ cacheEntry . key } ) and ${ uncachedContent . length } uncached messages` ,
69+ )
70+ }
4571
46- // if (cacheEntry) {
47- // uncachedContent = contents.slice(cacheEntry.count, contents.length)
48- // cachedContent = cacheEntry.key
49- // }
72+ const timestamp = Date . now ( )
5073
51- // const newCacheEntry = await this.client.caches.create({
52- // model,
53- // config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
54- // })
74+ const config : CreateCachedContentConfig = {
75+ contents,
76+ systemInstruction,
77+ ttl : `${ CACHE_TTL * 60 } s` ,
78+ httpOptions : { timeout : 10_000 } ,
79+ }
80+
81+ this . client . caches
82+ . create ( { model, config } )
83+ . then ( ( result ) => {
84+ console . log ( `[GeminiHandler] caches.create result -> ${ JSON . stringify ( result ) } ` )
85+ const { name, usageMetadata } = result
86+
87+ if ( name ) {
88+ this . contentCaches . set < CacheEntry > ( cacheKey , { key : name , count : contents . length } )
89+ cacheWriteTokens = usageMetadata ?. totalTokenCount ?? 0
90+ console . log (
91+ `[GeminiHandler] cached ${ contents . length } messages (${ cacheWriteTokens } tokens) in ${ Date . now ( ) - timestamp } ms` ,
92+ )
93+ }
94+ } )
95+ . catch ( ( error ) => {
96+ console . error ( `[GeminiHandler] caches.create error` , error )
97+ } )
98+ }
5599
56- // if (newCacheEntry.name) {
57- // this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
58- // cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
59- // }
60- // }
100+ const isCacheUsed = ! ! cachedContent
61101
62102 const params : GenerateContentParameters = {
63103 model,
64104 contents : uncachedContent ?? contents ,
65105 config : {
66106 cachedContent,
67- systemInstruction : cachedContent ? undefined : systemInstruction ,
107+ systemInstruction : isCacheUsed ? undefined : systemInstruction ,
68108 httpOptions : this . options . googleGeminiBaseUrl
69109 ? { baseUrl : this . options . googleGeminiBaseUrl }
70110 : undefined ,
@@ -94,13 +134,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
94134 const cacheReadTokens = lastUsageMetadata . cachedContentTokenCount
95135 const reasoningTokens = lastUsageMetadata . thoughtsTokenCount
96136
97- // const totalCost = this.calculateCost({
98- // info,
99- // inputTokens,
100- // outputTokens,
101- // cacheWriteTokens,
102- // cacheReadTokens,
103- // })
137+ const totalCost = isCacheUsed
138+ ? this . calculateCost ( {
139+ info,
140+ inputTokens,
141+ outputTokens,
142+ cacheWriteTokens,
143+ cacheReadTokens,
144+ } )
145+ : undefined
104146
105147 yield {
106148 type : "usage" ,
@@ -109,7 +151,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109151 cacheWriteTokens,
110152 cacheReadTokens,
111153 reasoningTokens,
112- // totalCost,
154+ totalCost,
113155 }
114156 }
115157 }
0 commit comments