diff --git a/src/api/providers/__tests__/unbound.spec.ts b/src/api/providers/__tests__/unbound.spec.ts index 68d2190c44..654b480475 100644 --- a/src/api/providers/__tests__/unbound.spec.ts +++ b/src/api/providers/__tests__/unbound.spec.ts @@ -37,6 +37,32 @@ vitest.mock("../fetchers/modelCache", () => ({ thinking: false, supportsComputerUse: true, }, + "anthropic/claude-sonnet-4-20250514": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: "Claude Sonnet 4", + thinking: false, + supportsComputerUse: true, + }, + "anthropic/claude-opus-4-20250514": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 15, + outputPrice: 75, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, + description: "Claude Opus 4", + thinking: false, + supportsComputerUse: true, + }, "openai/gpt-4o": { maxTokens: 4096, contextWindow: 128000, @@ -46,6 +72,32 @@ vitest.mock("../fetchers/modelCache", () => ({ outputPrice: 15, description: "GPT-4o", }, + "anthropic/claude-3-haiku-20240307": { + maxTokens: 4096, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, // Haiku supports caching and should receive it + inputPrice: 0.25, + outputPrice: 1.25, + cacheWritesPrice: 0.3, + cacheReadsPrice: 0.03, + description: "Claude 3 Haiku", + thinking: false, + supportsComputerUse: false, + }, + "anthropic/claude-3-5-haiku-20241022": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: true, // 3.5 Haiku supports caching and should receive it + inputPrice: 0.8, + outputPrice: 4.0, + cacheWritesPrice: 1.0, + cacheReadsPrice: 0.08, + description: "Claude 3.5 Haiku", + thinking: false, + supportsComputerUse: false, + }, "openai/o3-mini": { maxTokens: 4096, contextWindow: 128000, @@ -319,4 +371,226 @@ describe("UnboundHandler", () => { expect(modelInfo.info).toBeDefined() }) }) + + describe("cache breakpoints", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Hello!", + }, + ] + + it("should apply cache breakpoints for Claude 3 models", async () => { + const claude3Handler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "anthropic/claude-3-5-sonnet-20241022", + }) + + mockCreate.mockClear() + const stream = claude3Handler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was added to system message and user message + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + expect.objectContaining({ + role: "user", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should apply cache breakpoints for Claude 4 models", async () => { + const claude4Handler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "anthropic/claude-sonnet-4-20250514", + }) + + mockCreate.mockClear() + const stream = claude4Handler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was added to system message and user message + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + expect.objectContaining({ + role: "user", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should apply cache breakpoints for Claude Opus 4 models", async () => { + const claudeOpus4Handler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "anthropic/claude-opus-4-20250514", + }) + + mockCreate.mockClear() + const stream = claudeOpus4Handler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was added to system message and user message + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + expect.objectContaining({ + role: "user", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should apply cache breakpoints for Claude 3 Haiku models", async () => { + const haikuHandler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "anthropic/claude-3-haiku-20240307", + }) + + mockCreate.mockClear() + const stream = haikuHandler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was added to system message and user message + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + expect.objectContaining({ + role: "user", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should apply cache breakpoints for Claude 3.5 Haiku models", async () => { + const haiku35Handler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "anthropic/claude-3-5-haiku-20241022", + }) + + mockCreate.mockClear() + const stream = haiku35Handler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was added to system message and user message + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + expect.objectContaining({ + role: "user", + content: expect.arrayContaining([ + expect.objectContaining({ cache_control: { type: "ephemeral" } }), + ]), + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should NOT apply cache breakpoints for non-Claude models", async () => { + const openaiHandler = new UnboundHandler({ + ...mockOptions, + unboundModelId: "openai/gpt-4o", + }) + + mockCreate.mockClear() + const stream = openaiHandler.createMessage(systemPrompt, messages) + const chunks: Array<{ type: string } & Record> = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify that cache control was NOT added - messages should be strings, not arrays with cache_control + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: systemPrompt, // Should be string, not array with cache_control + }), + expect.objectContaining({ + role: "user", + content: "Hello!", // Should be string, not array with cache_control + }), + ]), + }), + expect.any(Object), + ) + }) + }) })