Skip to content

Commit 6722ae2

Browse files
committed
fix: add tiktoken fallback for VSCode LM API token counting
- Add tiktoken fallback when VSCode API returns 0, negative, or invalid values - Add tiktoken fallback when VSCode API is unavailable or throws errors - Update token counting to provide more frequent updates during streaming (every 100 chars) - Add comprehensive tests for fallback behavior Fixes #6112
1 parent 342ee70 commit 6722ae2

File tree

2 files changed

+251
-14
lines changed

2 files changed

+251
-14
lines changed

src/api/providers/__tests__/vscode-lm.spec.ts

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,19 @@ import { VsCodeLmHandler } from "../vscode-lm"
5959
import type { ApiHandlerOptions } from "../../../shared/api"
6060
import type { Anthropic } from "@anthropic-ai/sdk"
6161

62+
// Mock the base provider's countTokens method
63+
vi.mock("../base-provider", async () => {
64+
const actual = await vi.importActual("../base-provider")
65+
return {
66+
...actual,
67+
BaseProvider: class MockBaseProvider {
68+
async countTokens() {
69+
return 100 // Mock tiktoken to return 100 tokens
70+
}
71+
},
72+
}
73+
})
74+
6275
const mockLanguageModelChat = {
6376
id: "test-model",
6477
name: "Test Model",
@@ -300,4 +313,149 @@ describe("VsCodeLmHandler", () => {
300313
await expect(promise).rejects.toThrow("VSCode LM completion error: Completion failed")
301314
})
302315
})
316+
317+
describe("countTokens with tiktoken fallback", () => {
318+
it("should fall back to tiktoken when VSCode API returns 0 for non-empty content", async () => {
319+
const content: Anthropic.Messages.ContentBlockParam[] = [
320+
{
321+
type: "text",
322+
text: "Hello world",
323+
},
324+
]
325+
326+
// Mock VSCode API to return 0
327+
mockLanguageModelChat.countTokens.mockResolvedValue(0)
328+
handler["client"] = mockLanguageModelChat
329+
handler["currentRequestCancellation"] = new vscode.CancellationTokenSource()
330+
331+
const result = await handler.countTokens(content)
332+
333+
// Should use tiktoken fallback which returns 100
334+
expect(result).toBe(100)
335+
})
336+
337+
it("should fall back to tiktoken when VSCode API throws an error", async () => {
338+
const content: Anthropic.Messages.ContentBlockParam[] = [
339+
{
340+
type: "text",
341+
text: "Hello world",
342+
},
343+
]
344+
345+
// Mock VSCode API to throw an error
346+
mockLanguageModelChat.countTokens.mockRejectedValue(new Error("API Error"))
347+
handler["client"] = mockLanguageModelChat
348+
handler["currentRequestCancellation"] = new vscode.CancellationTokenSource()
349+
350+
const result = await handler.countTokens(content)
351+
352+
// Should use tiktoken fallback which returns 100
353+
expect(result).toBe(100)
354+
})
355+
356+
it("should use VSCode API when it returns valid token count", async () => {
357+
const content: Anthropic.Messages.ContentBlockParam[] = [
358+
{
359+
type: "text",
360+
text: "Hello world",
361+
},
362+
]
363+
364+
// Mock VSCode API to return valid count
365+
mockLanguageModelChat.countTokens.mockResolvedValue(50)
366+
handler["client"] = mockLanguageModelChat
367+
handler["currentRequestCancellation"] = new vscode.CancellationTokenSource()
368+
369+
const result = await handler.countTokens(content)
370+
371+
// Should use VSCode API result
372+
expect(result).toBe(50)
373+
})
374+
375+
it("should fall back to tiktoken when no client is available", async () => {
376+
const content: Anthropic.Messages.ContentBlockParam[] = [
377+
{
378+
type: "text",
379+
text: "Hello world",
380+
},
381+
]
382+
383+
// No client available
384+
handler["client"] = null
385+
386+
const result = await handler.countTokens(content)
387+
388+
// Should use tiktoken fallback which returns 100
389+
expect(result).toBe(100)
390+
})
391+
392+
it("should fall back to tiktoken when VSCode API returns negative value", async () => {
393+
const content: Anthropic.Messages.ContentBlockParam[] = [
394+
{
395+
type: "text",
396+
text: "Hello world",
397+
},
398+
]
399+
400+
// Mock VSCode API to return negative value
401+
mockLanguageModelChat.countTokens.mockResolvedValue(-1)
402+
handler["client"] = mockLanguageModelChat
403+
handler["currentRequestCancellation"] = new vscode.CancellationTokenSource()
404+
405+
const result = await handler.countTokens(content)
406+
407+
// Should use tiktoken fallback which returns 100
408+
expect(result).toBe(100)
409+
})
410+
})
411+
412+
describe("createMessage with frequent token updates", () => {
413+
beforeEach(() => {
414+
const mockModel = { ...mockLanguageModelChat }
415+
;(vscode.lm.selectChatModels as Mock).mockResolvedValueOnce([mockModel])
416+
mockLanguageModelChat.countTokens.mockResolvedValue(10)
417+
418+
// Override the default client with our test client
419+
handler["client"] = mockLanguageModelChat
420+
})
421+
422+
it("should provide token updates during streaming", async () => {
423+
const systemPrompt = "You are a helpful assistant"
424+
const messages: Anthropic.Messages.MessageParam[] = [
425+
{
426+
role: "user" as const,
427+
content: "Hello",
428+
},
429+
]
430+
431+
// Create a long response to trigger intermediate token updates
432+
const longResponse = "a".repeat(150) // 150 characters to trigger at least one update
433+
mockLanguageModelChat.sendRequest.mockResolvedValueOnce({
434+
stream: (async function* () {
435+
// Send response in chunks
436+
yield new vscode.LanguageModelTextPart(longResponse.slice(0, 50))
437+
yield new vscode.LanguageModelTextPart(longResponse.slice(50, 100))
438+
yield new vscode.LanguageModelTextPart(longResponse.slice(100))
439+
return
440+
})(),
441+
text: (async function* () {
442+
yield longResponse
443+
return
444+
})(),
445+
})
446+
447+
const stream = handler.createMessage(systemPrompt, messages)
448+
const chunks = []
449+
for await (const chunk of stream) {
450+
chunks.push(chunk)
451+
}
452+
453+
// Should have text chunks and multiple usage updates
454+
const textChunks = chunks.filter((c) => c.type === "text")
455+
const usageChunks = chunks.filter((c) => c.type === "usage")
456+
457+
expect(textChunks).toHaveLength(3) // 3 text chunks
458+
expect(usageChunks.length).toBeGreaterThan(1) // At least 2 usage updates (intermediate + final)
459+
})
460+
})
303461
})

src/api/providers/vscode-lm.ts

Lines changed: 93 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -183,19 +183,32 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
183183
* @returns A promise resolving to the token count
184184
*/
185185
override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
186-
// Convert Anthropic content blocks to a string for VSCode LM token counting
187-
let textContent = ""
188-
189-
for (const block of content) {
190-
if (block.type === "text") {
191-
textContent += block.text || ""
192-
} else if (block.type === "image") {
193-
// VSCode LM doesn't support images directly, so we'll just use a placeholder
194-
textContent += "[IMAGE]"
186+
try {
187+
// Convert Anthropic content blocks to a string for VSCode LM token counting
188+
let textContent = ""
189+
190+
for (const block of content) {
191+
if (block.type === "text") {
192+
textContent += block.text || ""
193+
} else if (block.type === "image") {
194+
// VSCode LM doesn't support images directly, so we'll just use a placeholder
195+
textContent += "[IMAGE]"
196+
}
195197
}
196-
}
197198

198-
return this.internalCountTokens(textContent)
199+
const tokenCount = await this.internalCountTokens(textContent)
200+
201+
// If VSCode API returns 0 or fails, fall back to tiktoken
202+
if (tokenCount === 0 && textContent.length > 0) {
203+
console.debug("Roo Code <Language Model API>: Falling back to tiktoken for token counting")
204+
return super.countTokens(content)
205+
}
206+
207+
return tokenCount
208+
} catch (error) {
209+
console.warn("Roo Code <Language Model API>: Error in countTokens, falling back to tiktoken:", error)
210+
return super.countTokens(content)
211+
}
199212
}
200213

201214
/**
@@ -204,12 +217,24 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
204217
private async internalCountTokens(text: string | vscode.LanguageModelChatMessage): Promise<number> {
205218
// Check for required dependencies
206219
if (!this.client) {
207-
console.warn("Roo Code <Language Model API>: No client available for token counting")
220+
console.warn(
221+
"Roo Code <Language Model API>: No client available for token counting, using tiktoken fallback",
222+
)
223+
// Fall back to tiktoken for string inputs
224+
if (typeof text === "string") {
225+
return this.fallbackToTiktoken(text)
226+
}
208227
return 0
209228
}
210229

211230
if (!this.currentRequestCancellation) {
212-
console.warn("Roo Code <Language Model API>: No cancellation token available for token counting")
231+
console.warn(
232+
"Roo Code <Language Model API>: No cancellation token available for token counting, using tiktoken fallback",
233+
)
234+
// Fall back to tiktoken for string inputs
235+
if (typeof text === "string") {
236+
return this.fallbackToTiktoken(text)
237+
}
213238
return 0
214239
}
215240

@@ -240,14 +265,30 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
240265
// Validate the result
241266
if (typeof tokenCount !== "number") {
242267
console.warn("Roo Code <Language Model API>: Non-numeric token count received:", tokenCount)
268+
// Fall back to tiktoken for string inputs
269+
if (typeof text === "string") {
270+
return this.fallbackToTiktoken(text)
271+
}
243272
return 0
244273
}
245274

246275
if (tokenCount < 0) {
247276
console.warn("Roo Code <Language Model API>: Negative token count received:", tokenCount)
277+
// Fall back to tiktoken for string inputs
278+
if (typeof text === "string") {
279+
return this.fallbackToTiktoken(text)
280+
}
248281
return 0
249282
}
250283

284+
// If we get 0 tokens but have content, fall back to tiktoken
285+
if (tokenCount === 0 && typeof text === "string" && text.length > 0) {
286+
console.debug(
287+
"Roo Code <Language Model API>: VSCode API returned 0 tokens for non-empty text, using tiktoken fallback",
288+
)
289+
return this.fallbackToTiktoken(text)
290+
}
291+
251292
return tokenCount
252293
} catch (error) {
253294
// Handle specific error types
@@ -257,17 +298,42 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
257298
}
258299

259300
const errorMessage = error instanceof Error ? error.message : "Unknown error"
260-
console.warn("Roo Code <Language Model API>: Token counting failed:", errorMessage)
301+
console.warn("Roo Code <Language Model API>: Token counting failed, using tiktoken fallback:", errorMessage)
261302

262303
// Log additional error details if available
263304
if (error instanceof Error && error.stack) {
264305
console.debug("Token counting error stack:", error.stack)
265306
}
266307

308+
// Fall back to tiktoken for string inputs
309+
if (typeof text === "string") {
310+
return this.fallbackToTiktoken(text)
311+
}
312+
267313
return 0 // Fallback to prevent stream interruption
268314
}
269315
}
270316

317+
/**
318+
* Fallback to tiktoken for token counting when VSCode API is unavailable or returns invalid results
319+
*/
320+
private async fallbackToTiktoken(text: string): Promise<number> {
321+
try {
322+
// Convert text to Anthropic content blocks format for base provider
323+
const content: Anthropic.Messages.ContentBlockParam[] = [
324+
{
325+
type: "text",
326+
text: text,
327+
},
328+
]
329+
return super.countTokens(content)
330+
} catch (error) {
331+
console.error("Roo Code <Language Model API>: Tiktoken fallback failed:", error)
332+
// Last resort: estimate based on character count (rough approximation)
333+
return Math.ceil(text.length / 4)
334+
}
335+
}
336+
271337
private async calculateTotalInputTokens(
272338
systemPrompt: string,
273339
vsCodeLmMessages: vscode.LanguageModelChatMessage[],
@@ -363,6 +429,8 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
363429

364430
// Accumulate the text and count at the end of the stream to reduce token counting overhead.
365431
let accumulatedText: string = ""
432+
let lastTokenUpdateLength: number = 0
433+
const TOKEN_UPDATE_INTERVAL = 100 // Update tokens every 100 characters for more responsive UI
366434

367435
try {
368436
// Create the response stream with minimal required options
@@ -393,6 +461,17 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
393461
type: "text",
394462
text: chunk.value,
395463
}
464+
465+
// Provide more frequent token updates during streaming
466+
if (accumulatedText.length - lastTokenUpdateLength >= TOKEN_UPDATE_INTERVAL) {
467+
const currentOutputTokens = await this.internalCountTokens(accumulatedText)
468+
yield {
469+
type: "usage",
470+
inputTokens: totalInputTokens,
471+
outputTokens: currentOutputTokens,
472+
}
473+
lastTokenUpdateLength = accumulatedText.length
474+
}
396475
} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
397476
try {
398477
// Validate tool call parameters

0 commit comments

Comments
 (0)