From 15531b82614eb7246248dbecd1853ea11a350413 Mon Sep 17 00:00:00 2001 From: hannesrudolph Date: Wed, 25 Jun 2025 00:03:18 -0600 Subject: [PATCH 1/4] fix: resolve Claude Code token counting inefficiency and enable caching (#5104) - Remove 1.5x fudge factor from Claude Code token counting - Enable prompt caching support for all Claude Code models - Add comprehensive tests for token counting and caching - Update existing tests to reflect accurate token counting This fixes the extreme token inefficiency where simple messages would jump from ~40k to over 60k tokens, causing API hangs when approaching the artificial 120k limit. Claude Code now properly utilizes its full 200k context window with accurate token counting. --- packages/types/src/providers/claude-code.ts | 10 +- .../__tests__/claude-code-caching.spec.ts | 305 ++++++++++++++++++ .../claude-code-token-counting.spec.ts | 117 +++++++ .../providers/__tests__/claude-code.spec.ts | 2 +- src/api/providers/claude-code.ts | 40 +++ 5 files changed, 468 insertions(+), 6 deletions(-) create mode 100644 src/api/providers/__tests__/claude-code-caching.spec.ts create mode 100644 src/api/providers/__tests__/claude-code-token-counting.spec.ts diff --git a/packages/types/src/providers/claude-code.ts b/packages/types/src/providers/claude-code.ts index 707312e915..d0fff0f2ee 100644 --- a/packages/types/src/providers/claude-code.ts +++ b/packages/types/src/providers/claude-code.ts @@ -8,7 +8,7 @@ export const claudeCodeModels = { "claude-sonnet-4-20250514": { ...anthropicModels["claude-sonnet-4-20250514"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -16,7 +16,7 @@ export const claudeCodeModels = { "claude-opus-4-20250514": { ...anthropicModels["claude-opus-4-20250514"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -24,7 +24,7 @@ export const claudeCodeModels = { "claude-3-7-sonnet-20250219": { ...anthropicModels["claude-3-7-sonnet-20250219"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -32,7 +32,7 @@ export const claudeCodeModels = { "claude-3-5-sonnet-20241022": { ...anthropicModels["claude-3-5-sonnet-20241022"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -40,7 +40,7 @@ export const claudeCodeModels = { "claude-3-5-haiku-20241022": { ...anthropicModels["claude-3-5-haiku-20241022"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, diff --git a/src/api/providers/__tests__/claude-code-caching.spec.ts b/src/api/providers/__tests__/claude-code-caching.spec.ts new file mode 100644 index 0000000000..b7f7ff852a --- /dev/null +++ b/src/api/providers/__tests__/claude-code-caching.spec.ts @@ -0,0 +1,305 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ClaudeCodeHandler } from "../claude-code" +import { runClaudeCode } from "../../../integrations/claude-code/run" +import type { ApiHandlerOptions } from "../../../shared/api" +import type { ClaudeCodeMessage } from "../../../integrations/claude-code/types" +import type { ApiStreamUsageChunk } from "../../transform/stream" +import type { Anthropic } from "@anthropic-ai/sdk" + +// Mock the runClaudeCode function +vi.mock("../../../integrations/claude-code/run", () => ({ + runClaudeCode: vi.fn(), +})) + +describe("ClaudeCodeHandler - Caching Support", () => { + let handler: ClaudeCodeHandler + const mockOptions: ApiHandlerOptions = { + apiKey: "test-key", + apiModelId: "claude-3-5-sonnet-20241022", + claudeCodePath: "/test/path", + } + + beforeEach(() => { + handler = new ClaudeCodeHandler(mockOptions) + vi.clearAllMocks() + }) + + it("should collect cache read tokens from API response", async () => { + const mockStream = async function* (): AsyncGenerator { + // Initial system message + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // Assistant message with cache tokens + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 80, // 80 tokens read from cache + cache_creation_input_tokens: 20, // 20 new tokens cached + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + // Result with cost + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Find the usage chunk + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) + expect(usageChunk!.outputTokens).toBe(50) + expect(usageChunk!.cacheReadTokens).toBe(80) + expect(usageChunk!.cacheWriteTokens).toBe(20) + }) + + it("should accumulate cache tokens across multiple messages", async () => { + const mockStream = async function* (): AsyncGenerator { + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // First message chunk + const message1: Anthropic.Messages.Message = { + id: "msg_1", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Part 1", citations: [] }], + usage: { + input_tokens: 50, + output_tokens: 25, + cache_read_input_tokens: 40, + cache_creation_input_tokens: 10, + }, + stop_reason: null, + stop_sequence: null, + } + + yield { + type: "assistant", + message: message1, + session_id: "test-session", + } as ClaudeCodeMessage + + // Second message chunk + const message2: Anthropic.Messages.Message = { + id: "msg_2", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Part 2", citations: [] }], + usage: { + input_tokens: 50, + output_tokens: 25, + cache_read_input_tokens: 30, + cache_creation_input_tokens: 20, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message: message2, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.002, + is_error: false, + duration_ms: 2000, + duration_api_ms: 1800, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) // 50 + 50 + expect(usageChunk!.outputTokens).toBe(50) // 25 + 25 + expect(usageChunk!.cacheReadTokens).toBe(70) // 40 + 30 + expect(usageChunk!.cacheWriteTokens).toBe(30) // 10 + 20 + }) + + it("should handle missing cache token fields gracefully", async () => { + const mockStream = async function* (): AsyncGenerator { + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // Message without cache tokens + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: null, + cache_creation_input_tokens: null, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) + expect(usageChunk!.outputTokens).toBe(50) + expect(usageChunk!.cacheReadTokens).toBe(0) + expect(usageChunk!.cacheWriteTokens).toBe(0) + }) + + it("should report zero cost for subscription usage", async () => { + const mockStream = async function* (): AsyncGenerator { + // Subscription usage has apiKeySource: "none" + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "none", + } as ClaudeCodeMessage + + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 80, + cache_creation_input_tokens: 20, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, // This should be ignored for subscription usage + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.totalCost).toBe(0) // Should be 0 for subscription usage + }) +}) diff --git a/src/api/providers/__tests__/claude-code-token-counting.spec.ts b/src/api/providers/__tests__/claude-code-token-counting.spec.ts new file mode 100644 index 0000000000..35d2ff9fbe --- /dev/null +++ b/src/api/providers/__tests__/claude-code-token-counting.spec.ts @@ -0,0 +1,117 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ClaudeCodeHandler } from "../claude-code" +import { ApiHandlerOptions } from "../../../shared/api" +import type { Anthropic } from "@anthropic-ai/sdk" + +describe("ClaudeCodeHandler Token Counting", () => { + let handler: ClaudeCodeHandler + let mockOptions: ApiHandlerOptions + + beforeEach(() => { + mockOptions = { + apiModelId: "claude-3-5-sonnet-20241022", + claudeCodePath: "/usr/local/bin/claude", + } as ApiHandlerOptions + + handler = new ClaudeCodeHandler(mockOptions) + }) + + describe("countTokens", () => { + it("should count tokens accurately without any fudge factor", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [ + { + type: "text", + text: "Hello, this is a test message to verify token counting accuracy.", + }, + ] + + const tokenCount = await handler.countTokens(content) + + // The text has approximately 13-15 tokens + // With no fudge factor, we expect the exact token count + // With the old 1.5x fudge factor, it would have been around 20-23 tokens + expect(tokenCount).toBeLessThan(16) + expect(tokenCount).toBeGreaterThan(12) + }) + + it("should handle empty content", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [] + const tokenCount = await handler.countTokens(content) + expect(tokenCount).toBe(0) + }) + + it("should handle multiple text blocks", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [ + { type: "text", text: "First block" }, + { type: "text", text: "Second block" }, + { type: "text", text: "Third block" }, + ] + + const tokenCount = await handler.countTokens(content) + + // Each block is approximately 2-3 tokens, so 6-9 tokens total + // With no fudge factor, expect exact count + expect(tokenCount).toBeLessThan(10) // Would be ~15 with old 1.5x factor + expect(tokenCount).toBeGreaterThan(5) + }) + + it("should handle image blocks with conservative estimate", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [ + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64data", + }, + }, + ] + + const tokenCount = await handler.countTokens(content) + + // Images get a conservative 300 tokens estimate (no fudge factor) + expect(tokenCount).toBe(300) + }) + + it("should provide accurate token counts for typical messages", async () => { + // Simulate a typical user message with environment details + const content: Anthropic.Messages.ContentBlockParam[] = [ + { + type: "text", + text: `Hi + + +# VSCode Visible Files +src/app.ts +src/utils.ts + +# VSCode Open Tabs +src/app.ts + +# Current Time +2024-01-01 12:00:00 PM + +# Current Context Size (Tokens) +1000 (5%) + +# Current Cost +$0.05 + +# Current Mode +code +Code +claude-3-5-sonnet-20241022 +`, + }, + ] + + const tokenCount = await handler.countTokens(content) + + // This content is approximately 100-120 tokens + // With no fudge factor, expect exact count + // With old 1.5x factor, it would have been 150-180 tokens + expect(tokenCount).toBeLessThan(125) + expect(tokenCount).toBeGreaterThan(95) + }) + }) +}) diff --git a/src/api/providers/__tests__/claude-code.spec.ts b/src/api/providers/__tests__/claude-code.spec.ts index 5bc4c6f1ea..d0dfa68eb8 100644 --- a/src/api/providers/__tests__/claude-code.spec.ts +++ b/src/api/providers/__tests__/claude-code.spec.ts @@ -34,7 +34,7 @@ describe("ClaudeCodeHandler", () => { const model = handler.getModel() expect(model.id).toBe("claude-3-5-sonnet-20241022") expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(false) + expect(model.info.supportsPromptCache).toBe(true) // Claude Code now supports prompt caching }) test("should use default model when invalid model provided", () => { diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts index bc72e658fe..b99bd91e1b 100644 --- a/src/api/providers/claude-code.ts +++ b/src/api/providers/claude-code.ts @@ -7,9 +7,12 @@ import { filterMessagesForClaudeCode } from "../../integrations/claude-code/mess import { BaseProvider } from "./base-provider" import { t } from "../../i18n" import { ApiHandlerOptions } from "../../shared/api" +import { Tiktoken } from "tiktoken/lite" +import o200kBase from "tiktoken/encoders/o200k_base" export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { private options: ApiHandlerOptions + private encoder: Tiktoken | null = null constructor(options: ApiHandlerOptions) { super() @@ -145,4 +148,41 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { return null } } + + /** + * Override the base provider's token counting to provide accurate counting for Claude Code. + * Claude Code uses the same tokenizer as Anthropic, so we don't need any fudge factor. + * The actual token counts are reported accurately in the API response. + */ + override async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { + if (content.length === 0) { + return 0 + } + + let totalTokens = 0 + + // Lazily create and cache the encoder if it doesn't exist + if (!this.encoder) { + this.encoder = new Tiktoken(o200kBase.bpe_ranks, o200kBase.special_tokens, o200kBase.pat_str) + } + + // Process each content block + for (const block of content) { + if (block.type === "text") { + const text = block.text || "" + if (text.length > 0) { + const tokens = this.encoder.encode(text) + totalTokens += tokens.length + } + } else if (block.type === "image") { + // Claude Code doesn't support images, but we handle them just in case + // Use a conservative estimate + totalTokens += 300 + } + } + + // No fudge factor needed - Claude Code uses the same tokenizer as Anthropic + // and reports accurate token counts in the API response + return totalTokens + } } From 24eca3fe89a73c5abd8c59cf394befe6a0efd44f Mon Sep 17 00:00:00 2001 From: hannesrudolph Date: Wed, 25 Jun 2025 00:21:33 -0600 Subject: [PATCH 2/4] fix: address PR review comments - Extract IMAGE_TOKEN_ESTIMATE as a named constant for clarity - Update token counting tests to use exact counts instead of ranges for deterministic testing - Fix test expectations to match actual tokenizer output --- .../claude-code-token-counting.spec.ts | 73 ++++++++++--------- src/api/providers/claude-code.ts | 7 +- 2 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/api/providers/__tests__/claude-code-token-counting.spec.ts b/src/api/providers/__tests__/claude-code-token-counting.spec.ts index 35d2ff9fbe..cefd3ec1c7 100644 --- a/src/api/providers/__tests__/claude-code-token-counting.spec.ts +++ b/src/api/providers/__tests__/claude-code-token-counting.spec.ts @@ -27,11 +27,9 @@ describe("ClaudeCodeHandler Token Counting", () => { const tokenCount = await handler.countTokens(content) - // The text has approximately 13-15 tokens - // With no fudge factor, we expect the exact token count - // With the old 1.5x fudge factor, it would have been around 20-23 tokens - expect(tokenCount).toBeLessThan(16) - expect(tokenCount).toBeGreaterThan(12) + // The exact token count for this text using o200k_base tokenizer is 13 + // With the old 1.5x fudge factor, it would have been 20 tokens + expect(tokenCount).toBe(13) }) it("should handle empty content", async () => { @@ -49,10 +47,9 @@ describe("ClaudeCodeHandler Token Counting", () => { const tokenCount = await handler.countTokens(content) - // Each block is approximately 2-3 tokens, so 6-9 tokens total - // With no fudge factor, expect exact count - expect(tokenCount).toBeLessThan(10) // Would be ~15 with old 1.5x factor - expect(tokenCount).toBeGreaterThan(5) + // "First block" = 2 tokens, "Second block" = 2 tokens, "Third block" = 2 tokens + // Total: 6 tokens (would have been 9 with old 1.5x factor) + expect(tokenCount).toBe(6) }) it("should handle image blocks with conservative estimate", async () => { @@ -74,44 +71,52 @@ describe("ClaudeCodeHandler Token Counting", () => { }) it("should provide accurate token counts for typical messages", async () => { - // Simulate a typical user message with environment details + // Use a simpler, predictable message for exact token counting const content: Anthropic.Messages.ContentBlockParam[] = [ { type: "text", - text: `Hi + text: "This is a simple test message with exactly predictable token count.", + }, + ] - -# VSCode Visible Files -src/app.ts -src/utils.ts + const tokenCount = await handler.countTokens(content) -# VSCode Open Tabs -src/app.ts + // This specific text has exactly 12 tokens with o200k_base tokenizer + // With old 1.5x factor, it would have been 18 tokens + expect(tokenCount).toBe(12) + }) -# Current Time -2024-01-01 12:00:00 PM + it("should handle mixed content types", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [ + { type: "text", text: "Hello world" }, // 2 tokens + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64data", + }, + }, // 300 tokens (IMAGE_TOKEN_ESTIMATE) + { type: "text", text: "Goodbye" }, // 1 token + ] -# Current Context Size (Tokens) -1000 (5%) + const tokenCount = await handler.countTokens(content) -# Current Cost -$0.05 + // Total: 2 + 300 + 2 = 304 tokens ("Goodbye" is actually 2 tokens) + expect(tokenCount).toBe(304) + }) -# Current Mode -code -Code -claude-3-5-sonnet-20241022 -`, - }, + it("should handle empty text blocks", async () => { + const content: Anthropic.Messages.ContentBlockParam[] = [ + { type: "text", text: "" }, + { type: "text", text: "Hello" }, // 1 token + { type: "text", text: "" }, ] const tokenCount = await handler.countTokens(content) - // This content is approximately 100-120 tokens - // With no fudge factor, expect exact count - // With old 1.5x factor, it would have been 150-180 tokens - expect(tokenCount).toBeLessThan(125) - expect(tokenCount).toBeGreaterThan(95) + // Only "Hello" contributes tokens + expect(tokenCount).toBe(1) }) }) }) diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts index b99bd91e1b..d75df8a07a 100644 --- a/src/api/providers/claude-code.ts +++ b/src/api/providers/claude-code.ts @@ -10,6 +10,10 @@ import { ApiHandlerOptions } from "../../shared/api" import { Tiktoken } from "tiktoken/lite" import o200kBase from "tiktoken/encoders/o200k_base" +// Conservative token estimate for images (even though Claude Code doesn't support them) +// This matches the estimate used in src/utils/tiktoken.ts for consistency +const IMAGE_TOKEN_ESTIMATE = 300 + export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { private options: ApiHandlerOptions private encoder: Tiktoken | null = null @@ -176,8 +180,7 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { } } else if (block.type === "image") { // Claude Code doesn't support images, but we handle them just in case - // Use a conservative estimate - totalTokens += 300 + totalTokens += IMAGE_TOKEN_ESTIMATE } } From 2f659bc5c3a5348356eb127c7df8108223d7350c Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 25 Jun 2025 17:02:26 -0500 Subject: [PATCH 3/4] Remove token counting changes, keep only cache support - Removed custom countTokens override from claude-code.ts - Deleted claude-code-token-counting.spec.ts test file - Kept cache token collection and reporting functionality - Kept supportsPromptCache: true for all Claude Code models - Kept claude-code-caching.spec.ts tests This focuses the PR on enabling cache support without modifying token counting behavior. --- .../claude-code-token-counting.spec.ts | 122 ------------------ src/api/providers/claude-code.ts | 43 ------ 2 files changed, 165 deletions(-) delete mode 100644 src/api/providers/__tests__/claude-code-token-counting.spec.ts diff --git a/src/api/providers/__tests__/claude-code-token-counting.spec.ts b/src/api/providers/__tests__/claude-code-token-counting.spec.ts deleted file mode 100644 index cefd3ec1c7..0000000000 --- a/src/api/providers/__tests__/claude-code-token-counting.spec.ts +++ /dev/null @@ -1,122 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from "vitest" -import { ClaudeCodeHandler } from "../claude-code" -import { ApiHandlerOptions } from "../../../shared/api" -import type { Anthropic } from "@anthropic-ai/sdk" - -describe("ClaudeCodeHandler Token Counting", () => { - let handler: ClaudeCodeHandler - let mockOptions: ApiHandlerOptions - - beforeEach(() => { - mockOptions = { - apiModelId: "claude-3-5-sonnet-20241022", - claudeCodePath: "/usr/local/bin/claude", - } as ApiHandlerOptions - - handler = new ClaudeCodeHandler(mockOptions) - }) - - describe("countTokens", () => { - it("should count tokens accurately without any fudge factor", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [ - { - type: "text", - text: "Hello, this is a test message to verify token counting accuracy.", - }, - ] - - const tokenCount = await handler.countTokens(content) - - // The exact token count for this text using o200k_base tokenizer is 13 - // With the old 1.5x fudge factor, it would have been 20 tokens - expect(tokenCount).toBe(13) - }) - - it("should handle empty content", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [] - const tokenCount = await handler.countTokens(content) - expect(tokenCount).toBe(0) - }) - - it("should handle multiple text blocks", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [ - { type: "text", text: "First block" }, - { type: "text", text: "Second block" }, - { type: "text", text: "Third block" }, - ] - - const tokenCount = await handler.countTokens(content) - - // "First block" = 2 tokens, "Second block" = 2 tokens, "Third block" = 2 tokens - // Total: 6 tokens (would have been 9 with old 1.5x factor) - expect(tokenCount).toBe(6) - }) - - it("should handle image blocks with conservative estimate", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [ - { - type: "image", - source: { - type: "base64", - media_type: "image/jpeg", - data: "base64data", - }, - }, - ] - - const tokenCount = await handler.countTokens(content) - - // Images get a conservative 300 tokens estimate (no fudge factor) - expect(tokenCount).toBe(300) - }) - - it("should provide accurate token counts for typical messages", async () => { - // Use a simpler, predictable message for exact token counting - const content: Anthropic.Messages.ContentBlockParam[] = [ - { - type: "text", - text: "This is a simple test message with exactly predictable token count.", - }, - ] - - const tokenCount = await handler.countTokens(content) - - // This specific text has exactly 12 tokens with o200k_base tokenizer - // With old 1.5x factor, it would have been 18 tokens - expect(tokenCount).toBe(12) - }) - - it("should handle mixed content types", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [ - { type: "text", text: "Hello world" }, // 2 tokens - { - type: "image", - source: { - type: "base64", - media_type: "image/jpeg", - data: "base64data", - }, - }, // 300 tokens (IMAGE_TOKEN_ESTIMATE) - { type: "text", text: "Goodbye" }, // 1 token - ] - - const tokenCount = await handler.countTokens(content) - - // Total: 2 + 300 + 2 = 304 tokens ("Goodbye" is actually 2 tokens) - expect(tokenCount).toBe(304) - }) - - it("should handle empty text blocks", async () => { - const content: Anthropic.Messages.ContentBlockParam[] = [ - { type: "text", text: "" }, - { type: "text", text: "Hello" }, // 1 token - { type: "text", text: "" }, - ] - - const tokenCount = await handler.countTokens(content) - - // Only "Hello" contributes tokens - expect(tokenCount).toBe(1) - }) - }) -}) diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts index d75df8a07a..bc72e658fe 100644 --- a/src/api/providers/claude-code.ts +++ b/src/api/providers/claude-code.ts @@ -7,16 +7,9 @@ import { filterMessagesForClaudeCode } from "../../integrations/claude-code/mess import { BaseProvider } from "./base-provider" import { t } from "../../i18n" import { ApiHandlerOptions } from "../../shared/api" -import { Tiktoken } from "tiktoken/lite" -import o200kBase from "tiktoken/encoders/o200k_base" - -// Conservative token estimate for images (even though Claude Code doesn't support them) -// This matches the estimate used in src/utils/tiktoken.ts for consistency -const IMAGE_TOKEN_ESTIMATE = 300 export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { private options: ApiHandlerOptions - private encoder: Tiktoken | null = null constructor(options: ApiHandlerOptions) { super() @@ -152,40 +145,4 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler { return null } } - - /** - * Override the base provider's token counting to provide accurate counting for Claude Code. - * Claude Code uses the same tokenizer as Anthropic, so we don't need any fudge factor. - * The actual token counts are reported accurately in the API response. - */ - override async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { - if (content.length === 0) { - return 0 - } - - let totalTokens = 0 - - // Lazily create and cache the encoder if it doesn't exist - if (!this.encoder) { - this.encoder = new Tiktoken(o200kBase.bpe_ranks, o200kBase.special_tokens, o200kBase.pat_str) - } - - // Process each content block - for (const block of content) { - if (block.type === "text") { - const text = block.text || "" - if (text.length > 0) { - const tokens = this.encoder.encode(text) - totalTokens += tokens.length - } - } else if (block.type === "image") { - // Claude Code doesn't support images, but we handle them just in case - totalTokens += IMAGE_TOKEN_ESTIMATE - } - } - - // No fudge factor needed - Claude Code uses the same tokenizer as Anthropic - // and reports accurate token counts in the API response - return totalTokens - } } From 98f3093d3ded77e1e3163e2e9e1b3a5ec9dbf6a5 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 25 Jun 2025 17:53:03 -0500 Subject: [PATCH 4/4] fix: update webview test to expect supportsPromptCache=true for Claude Code models --- .../src/components/ui/hooks/__tests__/useSelectedModel.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts index fd5950bf35..5fefabf59e 100644 --- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts +++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts @@ -402,7 +402,7 @@ describe("useSelectedModel", () => { expect(result.current.id).toBe("claude-sonnet-4-20250514") expect(result.current.info).toBeDefined() expect(result.current.info?.supportsImages).toBe(false) - expect(result.current.info?.supportsPromptCache).toBe(false) + expect(result.current.info?.supportsPromptCache).toBe(true) // Claude Code now supports prompt cache // Verify it inherits other properties from anthropic models expect(result.current.info?.maxTokens).toBe(64_000) expect(result.current.info?.contextWindow).toBe(200_000)