fix: improve VS Code LM token usage reporting for context window updates

roomote · roomote · commit c9a53c566e79 · 2025-07-23T13:27:38.000Z
- Add initial usage yield with input tokens at stream start
- Yield periodic token updates during streaming (every 500 chars)
- Include cache token fields (set to 0) for consistency with other providers
- This ensures the context window progress bar updates properly during streaming
diff --git a/packages/types/src/__tests__/provider-settings.test.ts b/packages/types/src/__tests__/provider-settings.test.ts
@@ -46,6 +46,12 @@ describe("getApiProtocol", () => {
 			expect(getApiProtocol("litellm", "claude-instant")).toBe("openai")
 			expect(getApiProtocol("ollama", "claude-model")).toBe("openai")
 		})
+
+		it("should return 'openai' for vscode-lm provider", () => {
+			expect(getApiProtocol("vscode-lm")).toBe("openai")
+			expect(getApiProtocol("vscode-lm", "copilot-gpt-4")).toBe("openai")
+			expect(getApiProtocol("vscode-lm", "copilot-gpt-3.5")).toBe("openai")
+		})
 	})
 
 	describe("Edge cases", () => {
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
@@ -361,8 +361,20 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 		// Calculate input tokens before starting the stream
 		const totalInputTokens: number = await this.calculateTotalInputTokens(systemPrompt, vsCodeLmMessages)
 
+		// Yield initial usage with input tokens (similar to Anthropic's message_start)
+		yield {
+			type: "usage",
+			inputTokens: totalInputTokens,
+			outputTokens: 0,
+			// VS Code LM doesn't provide cache token information, so we set them to 0
+			cacheWriteTokens: 0,
+			cacheReadTokens: 0,
+		}
+
 		// Accumulate the text and count at the end of the stream to reduce token counting overhead.
 		let accumulatedText: string = ""
+		let lastTokenCountUpdate: number = 0
+		const TOKEN_UPDATE_INTERVAL = 500 // Update token count every 500 characters
 
 		try {
 			// Create the response stream with minimal required options
@@ -393,6 +405,19 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 						type: "text",
 						text: chunk.value,
 					}
+
+					// Periodically yield token updates during streaming
+					if (accumulatedText.length - lastTokenCountUpdate > TOKEN_UPDATE_INTERVAL) {
+						const currentOutputTokens = await this.internalCountTokens(accumulatedText)
+						yield {
+							type: "usage",
+							inputTokens: 0,
+							outputTokens: currentOutputTokens,
+							cacheWriteTokens: 0,
+							cacheReadTokens: 0,
+						}
+						lastTokenCountUpdate = accumulatedText.length
+					}
 				} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
 					try {
 						// Validate tool call parameters
@@ -448,10 +473,14 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 			const totalOutputTokens: number = await this.internalCountTokens(accumulatedText)
 
 			// Report final usage after stream completion
+			// Note: We report the total tokens here, not incremental, as the UI expects the final total
 			yield {
 				type: "usage",
-				inputTokens: totalInputTokens,
-				outputTokens: totalOutputTokens,
+				inputTokens: 0, // Already reported at the start
+				outputTokens: totalOutputTokens, // Report the final total
+				// VS Code LM doesn't provide cache token information, so we set them to 0
+				cacheWriteTokens: 0,
+				cacheReadTokens: 0,
 			}
 		} catch (error: unknown) {
 			this.ensureCleanState()