@@ -361,8 +361,20 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
361361 // Calculate input tokens before starting the stream
362362 const totalInputTokens : number = await this . calculateTotalInputTokens ( systemPrompt , vsCodeLmMessages )
363363
364+ // Yield initial usage with input tokens (similar to Anthropic's message_start)
365+ yield {
366+ type : "usage" ,
367+ inputTokens : totalInputTokens ,
368+ outputTokens : 0 ,
369+ // VS Code LM doesn't provide cache token information, so we set them to 0
370+ cacheWriteTokens : 0 ,
371+ cacheReadTokens : 0 ,
372+ }
373+
364374 // Accumulate the text and count at the end of the stream to reduce token counting overhead.
365375 let accumulatedText : string = ""
376+ let lastTokenCountUpdate : number = 0
377+ const TOKEN_UPDATE_INTERVAL = 500 // Update token count every 500 characters
366378
367379 try {
368380 // Create the response stream with minimal required options
@@ -393,6 +405,19 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
393405 type : "text" ,
394406 text : chunk . value ,
395407 }
408+
409+ // Periodically yield token updates during streaming
410+ if ( accumulatedText . length - lastTokenCountUpdate > TOKEN_UPDATE_INTERVAL ) {
411+ const currentOutputTokens = await this . internalCountTokens ( accumulatedText )
412+ yield {
413+ type : "usage" ,
414+ inputTokens : 0 ,
415+ outputTokens : currentOutputTokens ,
416+ cacheWriteTokens : 0 ,
417+ cacheReadTokens : 0 ,
418+ }
419+ lastTokenCountUpdate = accumulatedText . length
420+ }
396421 } else if ( chunk instanceof vscode . LanguageModelToolCallPart ) {
397422 try {
398423 // Validate tool call parameters
@@ -448,10 +473,14 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
448473 const totalOutputTokens : number = await this . internalCountTokens ( accumulatedText )
449474
450475 // Report final usage after stream completion
476+ // Note: We report the total tokens here, not incremental, as the UI expects the final total
451477 yield {
452478 type : "usage" ,
453- inputTokens : totalInputTokens ,
454- outputTokens : totalOutputTokens ,
479+ inputTokens : 0 , // Already reported at the start
480+ outputTokens : totalOutputTokens , // Report the final total
481+ // VS Code LM doesn't provide cache token information, so we set them to 0
482+ cacheWriteTokens : 0 ,
483+ cacheReadTokens : 0 ,
455484 }
456485 } catch ( error : unknown ) {
457486 this . ensureCleanState ( )
0 commit comments