RooCodeInc
diff --git a/‎src/api/providers/__tests__/vertex.test.ts‎
Lines changed: 210 additions & 5 deletions b/‎src/api/providers/__tests__/vertex.test.ts‎
Lines changed: 210 additions & 5 deletions
@@ -4,6 +4,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
 
 import { VertexHandler } from "../vertex"
+import { ApiStreamChunk } from "../../transform/stream"
 
 // Mock Vertex SDK
 jest.mock("@anthropic-ai/vertex-sdk", () => ({
@@ -128,7 +129,7 @@ describe("VertexHandler", () => {
 			;(handler["client"].messages as any).create = mockCreate
 
 			const stream = handler.createMessage(systemPrompt, mockMessages)
-			const chunks = []
+			const chunks: ApiStreamChunk[] = []
 
 			for await (const chunk of stream) {
 				chunks.push(chunk)
@@ -158,8 +159,29 @@ describe("VertexHandler", () => {
 				model: "claude-3-5-sonnet-v2@20241022",
 				max_tokens: 8192,
 				temperature: 0,
-				system: systemPrompt,
-				messages: mockMessages,
+				system: [
+					{
+						type: "text",
+						text: "You are a helpful assistant",
+						cache_control: { type: "ephemeral" },
+					},
+				],
+				messages: [
+					{
+						role: "user",
+						content: [
+							{
+								type: "text",
+								text: "Hello",
+								cache_control: { type: "ephemeral" },
+							},
+						],
+					},
+					{
+						role: "assistant",
+						content: "Hi there!",
+					},
+				],
 				stream: true,
 			})
 		})
@@ -196,7 +218,7 @@ describe("VertexHandler", () => {
 			;(handler["client"].messages as any).create = mockCreate
 
 			const stream = handler.createMessage(systemPrompt, mockMessages)
-			const chunks = []
+			const chunks: ApiStreamChunk[] = []
 
 			for await (const chunk of stream) {
 				chunks.push(chunk)
@@ -230,6 +252,183 @@ describe("VertexHandler", () => {
 				}
 			}).rejects.toThrow("Vertex API error")
 		})
+
+		it("should handle prompt caching for supported models", async () => {
+			const mockStream = [
+				{
+					type: "message_start",
+					message: {
+						usage: {
+							input_tokens: 10,
+							output_tokens: 0,
+							cache_creation_input_tokens: 3,
+							cache_read_input_tokens: 2,
+						},
+					},
+				},
+				{
+					type: "content_block_start",
+					index: 0,
+					content_block: {
+						type: "text",
+						text: "Hello",
+					},
+				},
+				{
+					type: "content_block_delta",
+					delta: {
+						type: "text_delta",
+						text: " world!",
+					},
+				},
+				{
+					type: "message_delta",
+					usage: {
+						output_tokens: 5,
+					},
+				},
+			]
+
+			const asyncIterator = {
+				async *[Symbol.asyncIterator]() {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				},
+			}
+
+			const mockCreate = jest.fn().mockResolvedValue(asyncIterator)
+			;(handler["client"].messages as any).create = mockCreate
+
+			const stream = handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: "First message",
+				},
+				{
+					role: "assistant",
+					content: "Response",
+				},
+				{
+					role: "user",
+					content: "Second message",
+				},
+			])
+
+			const chunks: ApiStreamChunk[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify usage information
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks).toHaveLength(2)
+			expect(usageChunks[0]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 0,
+				cacheWriteTokens: 3,
+				cacheReadTokens: 2,
+			})
+			expect(usageChunks[1]).toEqual({
+				type: "usage",
+				inputTokens: 0,
+				outputTokens: 5,
+			})
+
+			// Verify text content
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("Hello")
+			expect(textChunks[1].text).toBe(" world!")
+
+			// Verify cache control was added correctly
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					system: [
+						{
+							type: "text",
+							text: "You are a helpful assistant",
+							cache_control: { type: "ephemeral" },
+						},
+					],
+					messages: [
+						expect.objectContaining({
+							role: "user",
+							content: [
+								{
+									type: "text",
+									text: "First message",
+									cache_control: { type: "ephemeral" },
+								},
+							],
+						}),
+						expect.objectContaining({
+							role: "assistant",
+							content: "Response",
+						}),
+						expect.objectContaining({
+							role: "user",
+							content: [
+								{
+									type: "text",
+									text: "Second message",
+									cache_control: { type: "ephemeral" },
+								},
+							],
+						}),
+					],
+				}),
+			)
+		})
+
+		it("should handle cache-related usage metrics", async () => {
+			const mockStream = [
+				{
+					type: "message_start",
+					message: {
+						usage: {
+							input_tokens: 10,
+							output_tokens: 0,
+							cache_creation_input_tokens: 5,
+							cache_read_input_tokens: 3,
+						},
+					},
+				},
+				{
+					type: "content_block_start",
+					index: 0,
+					content_block: {
+						type: "text",
+						text: "Hello",
+					},
+				},
+			]
+
+			const asyncIterator = {
+				async *[Symbol.asyncIterator]() {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				},
+			}
+
+			const mockCreate = jest.fn().mockResolvedValue(asyncIterator)
+			;(handler["client"].messages as any).create = mockCreate
+
+			const stream = handler.createMessage(systemPrompt, mockMessages)
+			const chunks: ApiStreamChunk[] = []
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Check for cache-related metrics in usage chunk
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks.length).toBeGreaterThan(0)
+			expect(usageChunks[0]).toHaveProperty("cacheWriteTokens", 5)
+			expect(usageChunks[0]).toHaveProperty("cacheReadTokens", 3)
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -240,7 +439,13 @@ describe("VertexHandler", () => {
 				model: "claude-3-5-sonnet-v2@20241022",
 				max_tokens: 8192,
 				temperature: 0,
-				messages: [{ role: "user", content: "Test prompt" }],
+				system: "",
+				messages: [
+					{
+						role: "user",
+						content: [{ type: "text", text: "Test prompt", cache_control: { type: "ephemeral" } }],
+					},
+				],
 				stream: false,
 			})
 		})