@@ -2,6 +2,7 @@ import type { Anthropic } from "@anthropic-ai/sdk"
22// Restore GenerateContentConfig import and add GenerateContentResponseUsageMetadata
33import { GoogleGenAI , type GenerateContentConfig , type GenerateContentResponseUsageMetadata } from "@google/genai"
44import { withRetry } from "../retry"
5+ import { Part } from "@google/genai"
56import { ApiHandler } from "../"
67import { ApiHandlerOptions , geminiDefaultModelId , GeminiModelId , geminiModels , ModelInfo } from "@shared/api"
78import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
@@ -96,9 +97,10 @@ export class GeminiHandler implements ApiHandler {
9697 }
9798
9899 // Add thinking config if the model supports it
99- if ( info . thinkingConfig ?. outputPrice !== undefined && maxBudget > 0 ) {
100+ if ( thinkingBudget > 0 ) {
100101 requestConfig . thinkingConfig = {
101102 thinkingBudget : thinkingBudget ,
103+ includeThoughts : true ,
102104 }
103105 }
104106
@@ -111,6 +113,7 @@ export class GeminiHandler implements ApiHandler {
111113 let promptTokens = 0
112114 let outputTokens = 0
113115 let cacheReadTokens = 0
116+ let thoughtsTokenCount = 0 // Initialize thought token counts
114117 let lastUsageMetadata : GenerateContentResponseUsageMetadata | undefined
115118
116119 try {
@@ -130,6 +133,31 @@ export class GeminiHandler implements ApiHandler {
130133 isFirstSdkChunk = false
131134 }
132135
136+ // Handle thinking content from Gemini's response
137+ const candidateForThoughts = chunk ?. candidates ?. [ 0 ]
138+ const partsForThoughts = candidateForThoughts ?. content ?. parts
139+ let thoughts = "" // Initialize as empty string
140+
141+ if ( partsForThoughts ) {
142+ // This ensures partsForThoughts is a Part[] array
143+ for ( const part of partsForThoughts ) {
144+ const { thought, text } = part as Part
145+ if ( thought && text ) {
146+ // Ensure part.text exists
147+ // Handle the thought part
148+ thoughts += text + "\n" // Append thought and a newline
149+ }
150+ }
151+ }
152+
153+ if ( thoughts . trim ( ) !== "" ) {
154+ yield {
155+ type : "reasoning" ,
156+ reasoning : thoughts . trim ( ) ,
157+ }
158+ thoughts = "" // Reset thoughts after yielding
159+ }
160+
133161 if ( chunk . text ) {
134162 yield {
135163 type : "text" ,
@@ -141,6 +169,7 @@ export class GeminiHandler implements ApiHandler {
141169 lastUsageMetadata = chunk . usageMetadata
142170 promptTokens = lastUsageMetadata . promptTokenCount ?? promptTokens
143171 outputTokens = lastUsageMetadata . candidatesTokenCount ?? outputTokens
172+ thoughtsTokenCount = lastUsageMetadata . thoughtsTokenCount ?? thoughtsTokenCount
144173 cacheReadTokens = lastUsageMetadata . cachedContentTokenCount ?? cacheReadTokens
145174 }
146175 }
@@ -151,12 +180,14 @@ export class GeminiHandler implements ApiHandler {
151180 info,
152181 inputTokens : promptTokens ,
153182 outputTokens,
183+ thoughtsTokenCount,
154184 cacheReadTokens,
155185 } )
156186 yield {
157187 type : "usage" ,
158188 inputTokens : promptTokens ,
159189 outputTokens,
190+ thoughtsTokenCount,
160191 cacheReadTokens,
161192 cacheWriteTokens : 0 ,
162193 totalCost,
@@ -239,11 +270,13 @@ export class GeminiHandler implements ApiHandler {
239270 info,
240271 inputTokens,
241272 outputTokens,
273+ thoughtsTokenCount = 0 ,
242274 cacheReadTokens = 0 ,
243275 } : {
244276 info : ModelInfo
245277 inputTokens : number
246278 outputTokens : number
279+ thoughtsTokenCount : number
247280 cacheReadTokens ?: number
248281 } ) {
249282 // Exit early if any required pricing information is missing
@@ -275,18 +308,18 @@ export class GeminiHandler implements ApiHandler {
275308 const inputTokensCost = inputPrice * ( uncachedInputTokens / 1_000_000 )
276309
277310 // 2. Output token costs
278- const outputTokensCost = outputPrice * ( outputTokens / 1_000_000 )
311+ const responseTokensCost = outputPrice * ( ( outputTokens + thoughtsTokenCount ) / 1_000_000 )
279312
280313 // 3. Cache read costs (immediate)
281314 const cacheReadCost = ( cacheReadTokens ?? 0 ) > 0 ? cacheReadsPrice * ( ( cacheReadTokens ?? 0 ) / 1_000_000 ) : 0
282315
283316 // Calculate total immediate cost (excluding cache write/storage costs)
284- const totalCost = inputTokensCost + outputTokensCost + cacheReadCost
317+ const totalCost = inputTokensCost + responseTokensCost + cacheReadCost
285318
286319 // Create the trace object for debugging
287320 const trace : Record < string , { price : number ; tokens : number ; cost : number } > = {
288321 input : { price : inputPrice , tokens : uncachedInputTokens , cost : inputTokensCost } ,
289- output : { price : outputPrice , tokens : outputTokens , cost : outputTokensCost } ,
322+ output : { price : outputPrice , tokens : outputTokens , cost : responseTokensCost } ,
290323 }
291324
292325 // Only include cache read costs in the trace (cache write costs are tracked separately)
0 commit comments