@@ -54,7 +54,6 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
5454
5555 let uncachedContent : Content [ ] | undefined = undefined
5656 let cachedContent : string | undefined = undefined
57- let cacheWriteTokens : number | undefined = undefined
5857
5958 // The minimum input token count for context caching is 4,096.
6059 // For a basic approximation we assume 4 characters per token.
@@ -67,6 +66,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
6766 cacheKey &&
6867 contentsLength > 4 * CONTEXT_CACHE_TOKEN_MINIMUM
6968
69+ let cacheWrite = false
70+
7071 if ( isCacheAvailable ) {
7172 const cacheEntry = this . contentCaches . get < CacheEntry > ( cacheKey )
7273
@@ -97,9 +98,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
9798
9899 if ( name ) {
99100 this . contentCaches . set < CacheEntry > ( cacheKey , { key : name , count : contents . length } )
100- cacheWriteTokens = usageMetadata ?. totalTokenCount ?? 0
101101 console . log (
102- `[GeminiHandler] cached ${ contents . length } messages (${ cacheWriteTokens } tokens) in ${ Date . now ( ) - timestamp } ms` ,
102+ `[GeminiHandler] cached ${ contents . length } messages (${ usageMetadata ?. totalTokenCount ?? "-" } tokens) in ${ Date . now ( ) - timestamp } ms` ,
103103 )
104104 }
105105 } )
@@ -109,6 +109,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109109 . finally ( ( ) => {
110110 this . isCacheBusy = false
111111 } )
112+
113+ cacheWrite = true
112114 }
113115 }
114116
@@ -146,27 +148,24 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
146148 if ( lastUsageMetadata ) {
147149 const inputTokens = lastUsageMetadata . promptTokenCount ?? 0
148150 const outputTokens = lastUsageMetadata . candidatesTokenCount ?? 0
151+ const cacheWriteTokens = cacheWrite ? inputTokens : 0
149152 const cacheReadTokens = lastUsageMetadata . cachedContentTokenCount
150153 const reasoningTokens = lastUsageMetadata . thoughtsTokenCount
151154
152- const totalCost = isCacheUsed
153- ? this . calculateCost ( {
154- info,
155- inputTokens,
156- outputTokens,
157- cacheWriteTokens,
158- cacheReadTokens,
159- } )
160- : undefined
161-
162155 yield {
163156 type : "usage" ,
164157 inputTokens,
165158 outputTokens,
166159 cacheWriteTokens,
167160 cacheReadTokens,
168161 reasoningTokens,
169- totalCost,
162+ totalCost : this . calculateCost ( {
163+ info,
164+ inputTokens,
165+ outputTokens,
166+ cacheWriteTokens,
167+ cacheReadTokens,
168+ } ) ,
170169 }
171170 }
172171 }
@@ -250,8 +249,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
250249 info,
251250 inputTokens,
252251 outputTokens,
253- cacheWriteTokens,
254- cacheReadTokens,
252+ cacheWriteTokens = 0 ,
253+ cacheReadTokens = 0 ,
255254 } : {
256255 info : ModelInfo
257256 inputTokens : number
@@ -281,21 +280,32 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
281280 }
282281 }
283282
284- let inputTokensCost = inputPrice * ( inputTokens / 1_000_000 )
285- let outputTokensCost = outputPrice * ( outputTokens / 1_000_000 )
286- let cacheWriteCost = 0
287- let cacheReadCost = 0
283+ // Subtract the cached input tokens from the total input tokens.
284+ const uncachedInputTokens = inputTokens - cacheReadTokens
288285
289- if ( cacheWriteTokens ) {
290- cacheWriteCost = cacheWritesPrice * ( cacheWriteTokens / 1_000_000 ) * ( CACHE_TTL / 60 )
286+ let cacheWriteCost =
287+ cacheWriteTokens > 0 ? cacheWritesPrice * ( cacheWriteTokens / 1_000_000 ) * ( CACHE_TTL / 60 ) : 0
288+ let cacheReadCost = cacheReadTokens > 0 ? cacheReadsPrice * ( cacheReadTokens / 1_000_000 ) : 0
289+
290+ const inputTokensCost = inputPrice * ( uncachedInputTokens / 1_000_000 )
291+ const outputTokensCost = outputPrice * ( outputTokens / 1_000_000 )
292+ const totalCost = inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
293+
294+ const trace : Record < string , { price : number ; tokens : number ; cost : number } > = {
295+ input : { price : inputPrice , tokens : uncachedInputTokens , cost : inputTokensCost } ,
296+ output : { price : outputPrice , tokens : outputTokens , cost : outputTokensCost } ,
291297 }
292298
293- if ( cacheReadTokens ) {
294- const uncachedReadTokens = inputTokens - cacheReadTokens
295- cacheReadCost = cacheReadsPrice * ( cacheReadTokens / 1_000_000 )
296- inputTokensCost = inputPrice * ( uncachedReadTokens / 1_000_000 )
299+ if ( cacheWriteTokens > 0 ) {
300+ trace . cacheWrite = { price : cacheWritesPrice , tokens : cacheWriteTokens , cost : cacheWriteCost }
297301 }
298302
299- return inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
303+ if ( cacheReadTokens > 0 ) {
304+ trace . cacheRead = { price : cacheReadsPrice , tokens : cacheReadTokens , cost : cacheReadCost }
305+ }
306+
307+ // console.log(`[GeminiHandler] calculateCost -> ${totalCost}`, trace)
308+
309+ return totalCost
300310 }
301311}
0 commit comments