diff --git a/packages/types/src/providers/claude-code.ts b/packages/types/src/providers/claude-code.ts index 707312e915..d0fff0f2ee 100644 --- a/packages/types/src/providers/claude-code.ts +++ b/packages/types/src/providers/claude-code.ts @@ -8,7 +8,7 @@ export const claudeCodeModels = { "claude-sonnet-4-20250514": { ...anthropicModels["claude-sonnet-4-20250514"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -16,7 +16,7 @@ export const claudeCodeModels = { "claude-opus-4-20250514": { ...anthropicModels["claude-opus-4-20250514"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -24,7 +24,7 @@ export const claudeCodeModels = { "claude-3-7-sonnet-20250219": { ...anthropicModels["claude-3-7-sonnet-20250219"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -32,7 +32,7 @@ export const claudeCodeModels = { "claude-3-5-sonnet-20241022": { ...anthropicModels["claude-3-5-sonnet-20241022"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, @@ -40,7 +40,7 @@ export const claudeCodeModels = { "claude-3-5-haiku-20241022": { ...anthropicModels["claude-3-5-haiku-20241022"], supportsImages: false, - supportsPromptCache: false, + supportsPromptCache: true, // Claude Code does report cache tokens supportsReasoningEffort: false, supportsReasoningBudget: false, requiredReasoningBudget: false, diff --git a/src/api/providers/__tests__/claude-code-caching.spec.ts b/src/api/providers/__tests__/claude-code-caching.spec.ts new file mode 100644 index 0000000000..b7f7ff852a --- /dev/null +++ b/src/api/providers/__tests__/claude-code-caching.spec.ts @@ -0,0 +1,305 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ClaudeCodeHandler } from "../claude-code" +import { runClaudeCode } from "../../../integrations/claude-code/run" +import type { ApiHandlerOptions } from "../../../shared/api" +import type { ClaudeCodeMessage } from "../../../integrations/claude-code/types" +import type { ApiStreamUsageChunk } from "../../transform/stream" +import type { Anthropic } from "@anthropic-ai/sdk" + +// Mock the runClaudeCode function +vi.mock("../../../integrations/claude-code/run", () => ({ + runClaudeCode: vi.fn(), +})) + +describe("ClaudeCodeHandler - Caching Support", () => { + let handler: ClaudeCodeHandler + const mockOptions: ApiHandlerOptions = { + apiKey: "test-key", + apiModelId: "claude-3-5-sonnet-20241022", + claudeCodePath: "/test/path", + } + + beforeEach(() => { + handler = new ClaudeCodeHandler(mockOptions) + vi.clearAllMocks() + }) + + it("should collect cache read tokens from API response", async () => { + const mockStream = async function* (): AsyncGenerator { + // Initial system message + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // Assistant message with cache tokens + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 80, // 80 tokens read from cache + cache_creation_input_tokens: 20, // 20 new tokens cached + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + // Result with cost + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Find the usage chunk + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) + expect(usageChunk!.outputTokens).toBe(50) + expect(usageChunk!.cacheReadTokens).toBe(80) + expect(usageChunk!.cacheWriteTokens).toBe(20) + }) + + it("should accumulate cache tokens across multiple messages", async () => { + const mockStream = async function* (): AsyncGenerator { + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // First message chunk + const message1: Anthropic.Messages.Message = { + id: "msg_1", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Part 1", citations: [] }], + usage: { + input_tokens: 50, + output_tokens: 25, + cache_read_input_tokens: 40, + cache_creation_input_tokens: 10, + }, + stop_reason: null, + stop_sequence: null, + } + + yield { + type: "assistant", + message: message1, + session_id: "test-session", + } as ClaudeCodeMessage + + // Second message chunk + const message2: Anthropic.Messages.Message = { + id: "msg_2", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Part 2", citations: [] }], + usage: { + input_tokens: 50, + output_tokens: 25, + cache_read_input_tokens: 30, + cache_creation_input_tokens: 20, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message: message2, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.002, + is_error: false, + duration_ms: 2000, + duration_api_ms: 1800, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) // 50 + 50 + expect(usageChunk!.outputTokens).toBe(50) // 25 + 25 + expect(usageChunk!.cacheReadTokens).toBe(70) // 40 + 30 + expect(usageChunk!.cacheWriteTokens).toBe(30) // 10 + 20 + }) + + it("should handle missing cache token fields gracefully", async () => { + const mockStream = async function* (): AsyncGenerator { + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "user", + } as ClaudeCodeMessage + + // Message without cache tokens + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: null, + cache_creation_input_tokens: null, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.inputTokens).toBe(100) + expect(usageChunk!.outputTokens).toBe(50) + expect(usageChunk!.cacheReadTokens).toBe(0) + expect(usageChunk!.cacheWriteTokens).toBe(0) + }) + + it("should report zero cost for subscription usage", async () => { + const mockStream = async function* (): AsyncGenerator { + // Subscription usage has apiKeySource: "none" + yield { + type: "system", + subtype: "init", + session_id: "test-session", + tools: [], + mcp_servers: [], + apiKeySource: "none", + } as ClaudeCodeMessage + + const message: Anthropic.Messages.Message = { + id: "msg_123", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "Hello!", citations: [] }], + usage: { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 80, + cache_creation_input_tokens: 20, + }, + stop_reason: "end_turn", + stop_sequence: null, + } + + yield { + type: "assistant", + message, + session_id: "test-session", + } as ClaudeCodeMessage + + yield { + type: "result", + subtype: "success", + result: "success", + total_cost_usd: 0.001, // This should be ignored for subscription usage + is_error: false, + duration_ms: 1000, + duration_api_ms: 900, + num_turns: 1, + session_id: "test-session", + } as ClaudeCodeMessage + } + + vi.mocked(runClaudeCode).mockReturnValue(mockStream()) + + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage" && "totalCost" in c) as ApiStreamUsageChunk | undefined + expect(usageChunk).toBeDefined() + expect(usageChunk!.totalCost).toBe(0) // Should be 0 for subscription usage + }) +}) diff --git a/src/api/providers/__tests__/claude-code.spec.ts b/src/api/providers/__tests__/claude-code.spec.ts index 5bc4c6f1ea..d0dfa68eb8 100644 --- a/src/api/providers/__tests__/claude-code.spec.ts +++ b/src/api/providers/__tests__/claude-code.spec.ts @@ -34,7 +34,7 @@ describe("ClaudeCodeHandler", () => { const model = handler.getModel() expect(model.id).toBe("claude-3-5-sonnet-20241022") expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(false) + expect(model.info.supportsPromptCache).toBe(true) // Claude Code now supports prompt caching }) test("should use default model when invalid model provided", () => { diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts index fd5950bf35..5fefabf59e 100644 --- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts +++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts @@ -402,7 +402,7 @@ describe("useSelectedModel", () => { expect(result.current.id).toBe("claude-sonnet-4-20250514") expect(result.current.info).toBeDefined() expect(result.current.info?.supportsImages).toBe(false) - expect(result.current.info?.supportsPromptCache).toBe(false) + expect(result.current.info?.supportsPromptCache).toBe(true) // Claude Code now supports prompt cache // Verify it inherits other properties from anthropic models expect(result.current.info?.maxTokens).toBe(64_000) expect(result.current.info?.contextWindow).toBe(200_000)