Skip to content

Commit 05aceca

Browse files
fix: resolve Claude Code token counting inefficiency and enable caching (#5104) (#5108)
* fix: resolve Claude Code token counting inefficiency and enable caching (#5104) - Remove 1.5x fudge factor from Claude Code token counting - Enable prompt caching support for all Claude Code models - Add comprehensive tests for token counting and caching - Update existing tests to reflect accurate token counting This fixes the extreme token inefficiency where simple messages would jump from ~40k to over 60k tokens, causing API hangs when approaching the artificial 120k limit. Claude Code now properly utilizes its full 200k context window with accurate token counting. * fix: address PR review comments - Extract IMAGE_TOKEN_ESTIMATE as a named constant for clarity - Update token counting tests to use exact counts instead of ranges for deterministic testing - Fix test expectations to match actual tokenizer output * Remove token counting changes, keep only cache support - Removed custom countTokens override from claude-code.ts - Deleted claude-code-token-counting.spec.ts test file - Kept cache token collection and reporting functionality - Kept supportsPromptCache: true for all Claude Code models - Kept claude-code-caching.spec.ts tests This focuses the PR on enabling cache support without modifying token counting behavior. * fix: update webview test to expect supportsPromptCache=true for Claude Code models --------- Co-authored-by: Daniel Riccio <[email protected]>
1 parent 34d719e commit 05aceca

File tree

4 files changed

+312
-7
lines changed

4 files changed

+312
-7
lines changed

packages/types/src/providers/claude-code.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,39 @@ export const claudeCodeModels = {
88
"claude-sonnet-4-20250514": {
99
...anthropicModels["claude-sonnet-4-20250514"],
1010
supportsImages: false,
11-
supportsPromptCache: false,
11+
supportsPromptCache: true, // Claude Code does report cache tokens
1212
supportsReasoningEffort: false,
1313
supportsReasoningBudget: false,
1414
requiredReasoningBudget: false,
1515
},
1616
"claude-opus-4-20250514": {
1717
...anthropicModels["claude-opus-4-20250514"],
1818
supportsImages: false,
19-
supportsPromptCache: false,
19+
supportsPromptCache: true, // Claude Code does report cache tokens
2020
supportsReasoningEffort: false,
2121
supportsReasoningBudget: false,
2222
requiredReasoningBudget: false,
2323
},
2424
"claude-3-7-sonnet-20250219": {
2525
...anthropicModels["claude-3-7-sonnet-20250219"],
2626
supportsImages: false,
27-
supportsPromptCache: false,
27+
supportsPromptCache: true, // Claude Code does report cache tokens
2828
supportsReasoningEffort: false,
2929
supportsReasoningBudget: false,
3030
requiredReasoningBudget: false,
3131
},
3232
"claude-3-5-sonnet-20241022": {
3333
...anthropicModels["claude-3-5-sonnet-20241022"],
3434
supportsImages: false,
35-
supportsPromptCache: false,
35+
supportsPromptCache: true, // Claude Code does report cache tokens
3636
supportsReasoningEffort: false,
3737
supportsReasoningBudget: false,
3838
requiredReasoningBudget: false,
3939
},
4040
"claude-3-5-haiku-20241022": {
4141
...anthropicModels["claude-3-5-haiku-20241022"],
4242
supportsImages: false,
43-
supportsPromptCache: false,
43+
supportsPromptCache: true, // Claude Code does report cache tokens
4444
supportsReasoningEffort: false,
4545
supportsReasoningBudget: false,
4646
requiredReasoningBudget: false,
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import { ClaudeCodeHandler } from "../claude-code"
3+
import { runClaudeCode } from "../../../integrations/claude-code/run"
4+
import type { ApiHandlerOptions } from "../../../shared/api"
5+
import type { ClaudeCodeMessage } from "../../../integrations/claude-code/types"
6+
import type { ApiStreamUsageChunk } from "../../transform/stream"
7+
import type { Anthropic } from "@anthropic-ai/sdk"
8+
9+
// Mock the runClaudeCode function
10+
vi.mock("../../../integrations/claude-code/run", () => ({
11+
runClaudeCode: vi.fn(),
12+
}))
13+
14+
describe("ClaudeCodeHandler - Caching Support", () => {
15+
let handler: ClaudeCodeHandler
16+
const mockOptions: ApiHandlerOptions = {
17+
apiKey: "test-key",
18+
apiModelId: "claude-3-5-sonnet-20241022",
19+
claudeCodePath: "/test/path",
20+
}
21+
22+
beforeEach(() => {
23+
handler = new ClaudeCodeHandler(mockOptions)
24+
vi.clearAllMocks()
25+
})
26+
27+
it("should collect cache read tokens from API response", async () => {
28+
const mockStream = async function* (): AsyncGenerator<string | ClaudeCodeMessage> {
29+
// Initial system message
30+
yield {
31+
type: "system",
32+
subtype: "init",
33+
session_id: "test-session",
34+
tools: [],
35+
mcp_servers: [],
36+
apiKeySource: "user",
37+
} as ClaudeCodeMessage
38+
39+
// Assistant message with cache tokens
40+
const message: Anthropic.Messages.Message = {
41+
id: "msg_123",
42+
type: "message",
43+
role: "assistant",
44+
model: "claude-3-5-sonnet-20241022",
45+
content: [{ type: "text", text: "Hello!", citations: [] }],
46+
usage: {
47+
input_tokens: 100,
48+
output_tokens: 50,
49+
cache_read_input_tokens: 80, // 80 tokens read from cache
50+
cache_creation_input_tokens: 20, // 20 new tokens cached
51+
},
52+
stop_reason: "end_turn",
53+
stop_sequence: null,
54+
}
55+
56+
yield {
57+
type: "assistant",
58+
message,
59+
session_id: "test-session",
60+
} as ClaudeCodeMessage
61+
62+
// Result with cost
63+
yield {
64+
type: "result",
65+
subtype: "success",
66+
result: "success",
67+
total_cost_usd: 0.001,
68+
is_error: false,
69+
duration_ms: 1000,
70+
duration_api_ms: 900,
71+
num_turns: 1,
72+
session_id: "test-session",
73+
} as ClaudeCodeMessage
74+
}
75+
76+
vi.mocked(runClaudeCode).mockReturnValue(mockStream())
77+
78+
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }])
79+
80+
const chunks = []
81+
for await (const chunk of stream) {
82+
chunks.push(chunk)
83+
}
84+
85+
// Find the usage chunk
86+
const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined
87+
expect(usageChunk).toBeDefined()
88+
expect(usageChunk!.inputTokens).toBe(100)
89+
expect(usageChunk!.outputTokens).toBe(50)
90+
expect(usageChunk!.cacheReadTokens).toBe(80)
91+
expect(usageChunk!.cacheWriteTokens).toBe(20)
92+
})
93+
94+
it("should accumulate cache tokens across multiple messages", async () => {
95+
const mockStream = async function* (): AsyncGenerator<string | ClaudeCodeMessage> {
96+
yield {
97+
type: "system",
98+
subtype: "init",
99+
session_id: "test-session",
100+
tools: [],
101+
mcp_servers: [],
102+
apiKeySource: "user",
103+
} as ClaudeCodeMessage
104+
105+
// First message chunk
106+
const message1: Anthropic.Messages.Message = {
107+
id: "msg_1",
108+
type: "message",
109+
role: "assistant",
110+
model: "claude-3-5-sonnet-20241022",
111+
content: [{ type: "text", text: "Part 1", citations: [] }],
112+
usage: {
113+
input_tokens: 50,
114+
output_tokens: 25,
115+
cache_read_input_tokens: 40,
116+
cache_creation_input_tokens: 10,
117+
},
118+
stop_reason: null,
119+
stop_sequence: null,
120+
}
121+
122+
yield {
123+
type: "assistant",
124+
message: message1,
125+
session_id: "test-session",
126+
} as ClaudeCodeMessage
127+
128+
// Second message chunk
129+
const message2: Anthropic.Messages.Message = {
130+
id: "msg_2",
131+
type: "message",
132+
role: "assistant",
133+
model: "claude-3-5-sonnet-20241022",
134+
content: [{ type: "text", text: "Part 2", citations: [] }],
135+
usage: {
136+
input_tokens: 50,
137+
output_tokens: 25,
138+
cache_read_input_tokens: 30,
139+
cache_creation_input_tokens: 20,
140+
},
141+
stop_reason: "end_turn",
142+
stop_sequence: null,
143+
}
144+
145+
yield {
146+
type: "assistant",
147+
message: message2,
148+
session_id: "test-session",
149+
} as ClaudeCodeMessage
150+
151+
yield {
152+
type: "result",
153+
subtype: "success",
154+
result: "success",
155+
total_cost_usd: 0.002,
156+
is_error: false,
157+
duration_ms: 2000,
158+
duration_api_ms: 1800,
159+
num_turns: 1,
160+
session_id: "test-session",
161+
} as ClaudeCodeMessage
162+
}
163+
164+
vi.mocked(runClaudeCode).mockReturnValue(mockStream())
165+
166+
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }])
167+
168+
const chunks = []
169+
for await (const chunk of stream) {
170+
chunks.push(chunk)
171+
}
172+
173+
const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined
174+
expect(usageChunk).toBeDefined()
175+
expect(usageChunk!.inputTokens).toBe(100) // 50 + 50
176+
expect(usageChunk!.outputTokens).toBe(50) // 25 + 25
177+
expect(usageChunk!.cacheReadTokens).toBe(70) // 40 + 30
178+
expect(usageChunk!.cacheWriteTokens).toBe(30) // 10 + 20
179+
})
180+
181+
it("should handle missing cache token fields gracefully", async () => {
182+
const mockStream = async function* (): AsyncGenerator<string | ClaudeCodeMessage> {
183+
yield {
184+
type: "system",
185+
subtype: "init",
186+
session_id: "test-session",
187+
tools: [],
188+
mcp_servers: [],
189+
apiKeySource: "user",
190+
} as ClaudeCodeMessage
191+
192+
// Message without cache tokens
193+
const message: Anthropic.Messages.Message = {
194+
id: "msg_123",
195+
type: "message",
196+
role: "assistant",
197+
model: "claude-3-5-sonnet-20241022",
198+
content: [{ type: "text", text: "Hello!", citations: [] }],
199+
usage: {
200+
input_tokens: 100,
201+
output_tokens: 50,
202+
cache_read_input_tokens: null,
203+
cache_creation_input_tokens: null,
204+
},
205+
stop_reason: "end_turn",
206+
stop_sequence: null,
207+
}
208+
209+
yield {
210+
type: "assistant",
211+
message,
212+
session_id: "test-session",
213+
} as ClaudeCodeMessage
214+
215+
yield {
216+
type: "result",
217+
subtype: "success",
218+
result: "success",
219+
total_cost_usd: 0.001,
220+
is_error: false,
221+
duration_ms: 1000,
222+
duration_api_ms: 900,
223+
num_turns: 1,
224+
session_id: "test-session",
225+
} as ClaudeCodeMessage
226+
}
227+
228+
vi.mocked(runClaudeCode).mockReturnValue(mockStream())
229+
230+
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }])
231+
232+
const chunks = []
233+
for await (const chunk of stream) {
234+
chunks.push(chunk)
235+
}
236+
237+
const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined
238+
expect(usageChunk).toBeDefined()
239+
expect(usageChunk!.inputTokens).toBe(100)
240+
expect(usageChunk!.outputTokens).toBe(50)
241+
expect(usageChunk!.cacheReadTokens).toBe(0)
242+
expect(usageChunk!.cacheWriteTokens).toBe(0)
243+
})
244+
245+
it("should report zero cost for subscription usage", async () => {
246+
const mockStream = async function* (): AsyncGenerator<string | ClaudeCodeMessage> {
247+
// Subscription usage has apiKeySource: "none"
248+
yield {
249+
type: "system",
250+
subtype: "init",
251+
session_id: "test-session",
252+
tools: [],
253+
mcp_servers: [],
254+
apiKeySource: "none",
255+
} as ClaudeCodeMessage
256+
257+
const message: Anthropic.Messages.Message = {
258+
id: "msg_123",
259+
type: "message",
260+
role: "assistant",
261+
model: "claude-3-5-sonnet-20241022",
262+
content: [{ type: "text", text: "Hello!", citations: [] }],
263+
usage: {
264+
input_tokens: 100,
265+
output_tokens: 50,
266+
cache_read_input_tokens: 80,
267+
cache_creation_input_tokens: 20,
268+
},
269+
stop_reason: "end_turn",
270+
stop_sequence: null,
271+
}
272+
273+
yield {
274+
type: "assistant",
275+
message,
276+
session_id: "test-session",
277+
} as ClaudeCodeMessage
278+
279+
yield {
280+
type: "result",
281+
subtype: "success",
282+
result: "success",
283+
total_cost_usd: 0.001, // This should be ignored for subscription usage
284+
is_error: false,
285+
duration_ms: 1000,
286+
duration_api_ms: 900,
287+
num_turns: 1,
288+
session_id: "test-session",
289+
} as ClaudeCodeMessage
290+
}
291+
292+
vi.mocked(runClaudeCode).mockReturnValue(mockStream())
293+
294+
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }])
295+
296+
const chunks = []
297+
for await (const chunk of stream) {
298+
chunks.push(chunk)
299+
}
300+
301+
const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined
302+
expect(usageChunk).toBeDefined()
303+
expect(usageChunk!.totalCost).toBe(0) // Should be 0 for subscription usage
304+
})
305+
})

src/api/providers/__tests__/claude-code.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ describe("ClaudeCodeHandler", () => {
3434
const model = handler.getModel()
3535
expect(model.id).toBe("claude-3-5-sonnet-20241022")
3636
expect(model.info.supportsImages).toBe(false)
37-
expect(model.info.supportsPromptCache).toBe(false)
37+
expect(model.info.supportsPromptCache).toBe(true) // Claude Code now supports prompt caching
3838
})
3939

4040
test("should use default model when invalid model provided", () => {

webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ describe("useSelectedModel", () => {
402402
expect(result.current.id).toBe("claude-sonnet-4-20250514")
403403
expect(result.current.info).toBeDefined()
404404
expect(result.current.info?.supportsImages).toBe(false)
405-
expect(result.current.info?.supportsPromptCache).toBe(false)
405+
expect(result.current.info?.supportsPromptCache).toBe(true) // Claude Code now supports prompt cache
406406
// Verify it inherits other properties from anthropic models
407407
expect(result.current.info?.maxTokens).toBe(64_000)
408408
expect(result.current.info?.contextWindow).toBe(200_000)

0 commit comments

Comments
 (0)