Skip to content

Commit c9a53c5

Browse files
committed
fix: improve VS Code LM token usage reporting for context window updates
- Add initial usage yield with input tokens at stream start - Yield periodic token updates during streaming (every 500 chars) - Include cache token fields (set to 0) for consistency with other providers - This ensures the context window progress bar updates properly during streaming
1 parent 714fafd commit c9a53c5

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

packages/types/src/__tests__/provider-settings.test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ describe("getApiProtocol", () => {
4646
expect(getApiProtocol("litellm", "claude-instant")).toBe("openai")
4747
expect(getApiProtocol("ollama", "claude-model")).toBe("openai")
4848
})
49+
50+
it("should return 'openai' for vscode-lm provider", () => {
51+
expect(getApiProtocol("vscode-lm")).toBe("openai")
52+
expect(getApiProtocol("vscode-lm", "copilot-gpt-4")).toBe("openai")
53+
expect(getApiProtocol("vscode-lm", "copilot-gpt-3.5")).toBe("openai")
54+
})
4955
})
5056

5157
describe("Edge cases", () => {

src/api/providers/vscode-lm.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,20 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
361361
// Calculate input tokens before starting the stream
362362
const totalInputTokens: number = await this.calculateTotalInputTokens(systemPrompt, vsCodeLmMessages)
363363

364+
// Yield initial usage with input tokens (similar to Anthropic's message_start)
365+
yield {
366+
type: "usage",
367+
inputTokens: totalInputTokens,
368+
outputTokens: 0,
369+
// VS Code LM doesn't provide cache token information, so we set them to 0
370+
cacheWriteTokens: 0,
371+
cacheReadTokens: 0,
372+
}
373+
364374
// Accumulate the text and count at the end of the stream to reduce token counting overhead.
365375
let accumulatedText: string = ""
376+
let lastTokenCountUpdate: number = 0
377+
const TOKEN_UPDATE_INTERVAL = 500 // Update token count every 500 characters
366378

367379
try {
368380
// Create the response stream with minimal required options
@@ -393,6 +405,19 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
393405
type: "text",
394406
text: chunk.value,
395407
}
408+
409+
// Periodically yield token updates during streaming
410+
if (accumulatedText.length - lastTokenCountUpdate > TOKEN_UPDATE_INTERVAL) {
411+
const currentOutputTokens = await this.internalCountTokens(accumulatedText)
412+
yield {
413+
type: "usage",
414+
inputTokens: 0,
415+
outputTokens: currentOutputTokens,
416+
cacheWriteTokens: 0,
417+
cacheReadTokens: 0,
418+
}
419+
lastTokenCountUpdate = accumulatedText.length
420+
}
396421
} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
397422
try {
398423
// Validate tool call parameters
@@ -448,10 +473,14 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
448473
const totalOutputTokens: number = await this.internalCountTokens(accumulatedText)
449474

450475
// Report final usage after stream completion
476+
// Note: We report the total tokens here, not incremental, as the UI expects the final total
451477
yield {
452478
type: "usage",
453-
inputTokens: totalInputTokens,
454-
outputTokens: totalOutputTokens,
479+
inputTokens: 0, // Already reported at the start
480+
outputTokens: totalOutputTokens, // Report the final total
481+
// VS Code LM doesn't provide cache token information, so we set them to 0
482+
cacheWriteTokens: 0,
483+
cacheReadTokens: 0,
455484
}
456485
} catch (error: unknown) {
457486
this.ensureCleanState()

0 commit comments

Comments
 (0)