diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index 3fa7094d87..95a9af97ec 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -266,6 +266,7 @@ const xaiSchema = apiModelIdProviderModelSchema.extend({
 
 const groqSchema = apiModelIdProviderModelSchema.extend({
 	groqApiKey: z.string().optional(),
+	groqUsePromptCache: z.boolean().optional(),
 })
 
 const huggingFaceSchema = baseProviderSettingsSchema.extend({
diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts
index cab0c69900..a977a66654 100644
--- a/packages/types/src/providers/groq.ts
+++ b/packages/types/src/providers/groq.ts
@@ -22,90 +22,100 @@ export const groqModels = {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.05,
 		outputPrice: 0.08,
+		cacheReadsPrice: 0.01, // 80% discount on cached tokens
 		description: "Meta Llama 3.1 8B Instant model, 128K context.",
 	},
 	"llama-3.3-70b-versatile": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.59,
 		outputPrice: 0.79,
+		cacheReadsPrice: 0.118, // 80% discount on cached tokens
 		description: "Meta Llama 3.3 70B Versatile model, 128K context.",
 	},
 	"meta-llama/llama-4-scout-17b-16e-instruct": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.11,
 		outputPrice: 0.34,
+		cacheReadsPrice: 0.022, // 80% discount on cached tokens
 		description: "Meta Llama 4 Scout 17B Instruct model, 128K context.",
 	},
 	"meta-llama/llama-4-maverick-17b-128e-instruct": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.2,
 		outputPrice: 0.6,
+		cacheReadsPrice: 0.04, // 80% discount on cached tokens
 		description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.",
 	},
 	"mistral-saba-24b": {
 		maxTokens: 8192,
 		contextWindow: 32768,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.79,
 		outputPrice: 0.79,
+		cacheReadsPrice: 0.158, // 80% discount on cached tokens
 		description: "Mistral Saba 24B model, 32K context.",
 	},
 	"qwen-qwq-32b": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.29,
 		outputPrice: 0.39,
+		cacheReadsPrice: 0.058, // 80% discount on cached tokens
 		description: "Alibaba Qwen QwQ 32B model, 128K context.",
 	},
 	"qwen/qwen3-32b": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.29,
 		outputPrice: 0.59,
+		cacheReadsPrice: 0.058, // 80% discount on cached tokens
 		description: "Alibaba Qwen 3 32B model, 128K context.",
 	},
 	"deepseek-r1-distill-llama-70b": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.75,
 		outputPrice: 0.99,
+		cacheReadsPrice: 0.15, // 80% discount on cached tokens
 		description: "DeepSeek R1 Distill Llama 70B model, 128K context.",
 	},
 	"moonshotai/kimi-k2-instruct": {
 		maxTokens: 16384,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 1.0,
 		outputPrice: 3.0,
+		cacheReadsPrice: 0.2, // 80% discount on cached tokens
 		description: "Moonshot AI Kimi K2 Instruct 1T model, 128K context.",
 	},
 	"openai/gpt-oss-120b": {
 		maxTokens: 32766,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.15,
 		outputPrice: 0.75,
+		cacheReadsPrice: 0.03, // 80% discount on cached tokens
 		description:
 			"GPT-OSS 120B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 128 experts.",
 	},
@@ -113,9 +123,10 @@ export const groqModels = {
 		maxTokens: 32768,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.1,
 		outputPrice: 0.5,
+		cacheReadsPrice: 0.02, // 80% discount on cached tokens
 		description:
 			"GPT-OSS 20B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 32 experts.",
 	},
diff --git a/src/api/providers/__tests__/groq.spec.ts b/src/api/providers/__tests__/groq.spec.ts
index a943e84daa..80609382b6 100644
--- a/src/api/providers/__tests__/groq.spec.ts
+++ b/src/api/providers/__tests__/groq.spec.ts
@@ -42,6 +42,8 @@ describe("GroqHandler", () => {
 		const model = handler.getModel()
 		expect(model.id).toBe(groqDefaultModelId)
 		expect(model.info).toEqual(groqModels[groqDefaultModelId])
+		// Verify prompt caching is enabled
+		expect(model.info.supportsPromptCache).toBe(true)
 	})
 
 	it("should return specified model when valid model is provided", () => {
@@ -50,6 +52,8 @@ describe("GroqHandler", () => {
 		const model = handlerWithModel.getModel()
 		expect(model.id).toBe(testModelId)
 		expect(model.info).toEqual(groqModels[testModelId])
+		// Verify prompt caching is enabled
+		expect(model.info.supportsPromptCache).toBe(true)
 	})
 
 	it("completePrompt method should return text from Groq API", async () => {
@@ -108,7 +112,13 @@ describe("GroqHandler", () => {
 		const firstChunk = await stream.next()
 
 		expect(firstChunk.done).toBe(false)
-		expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 })
+		expect(firstChunk.value).toEqual({
+			type: "usage",
+			inputTokens: 10,
+			outputTokens: 20,
+			cacheWriteTokens: 0,
+			cacheReadTokens: 0,
+		})
 	})
 
 	it("createMessage should pass correct parameters to Groq client", async () => {
@@ -221,4 +231,373 @@ describe("GroqHandler", () => {
 			undefined,
 		)
 	})
+
+	it("createMessage should handle cached tokens from Groq API", async () => {
+		const testContent = "This is test content from Groq stream"
+		const cachedTokens = 50
+
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: { choices: [{ delta: { content: testContent } }] },
+						})
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 20,
+									prompt_tokens_details: {
+										cached_tokens: cachedTokens,
+									},
+								},
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		const stream = handler.createMessage("system prompt", [])
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should have text chunk and usage chunk
+		expect(chunks).toHaveLength(2)
+		expect(chunks[0]).toEqual({ type: "text", text: testContent })
+
+		// Usage chunk should properly handle cached tokens
+		expect(chunks[1]).toEqual({
+			type: "usage",
+			inputTokens: 50, // 100 total - 50 cached = 50 non-cached
+			outputTokens: 20,
+			cacheWriteTokens: 0, // Groq doesn't track cache writes
+			cacheReadTokens: 50,
+		})
+	})
+
+	it("createMessage should handle missing cache information gracefully", async () => {
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 20,
+									// No prompt_tokens_details
+								},
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		const stream = handler.createMessage("system prompt", [])
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should handle missing cache information gracefully
+		expect(chunks).toHaveLength(1)
+		expect(chunks[0]).toEqual({
+			type: "usage",
+			inputTokens: 100, // No cached tokens, so all are non-cached
+			outputTokens: 20,
+			cacheWriteTokens: 0,
+			cacheReadTokens: 0, // Default to 0 when not provided
+		})
+
+		describe("Prompt Caching", () => {
+			it("should use caching strategy when groqUsePromptCache is enabled", async () => {
+				const handlerWithCache = new GroqHandler({
+					groqApiKey: "test-groq-api-key",
+					groqUsePromptCache: true,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt for caching"
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{ role: "user", content: "First message" },
+					{ role: "assistant", content: "First response" },
+					{ role: "user", content: "Second message" },
+				]
+
+				const messageGenerator = handlerWithCache.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				// Verify that the messages were formatted with the system prompt
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						messages: expect.arrayContaining([
+							{ role: "system", content: systemPrompt },
+							{ role: "user", content: "First message" },
+							{ role: "assistant", content: "First response" },
+							{ role: "user", content: "Second message" },
+						]),
+					}),
+					undefined,
+				)
+			})
+
+			it("should not use caching strategy when groqUsePromptCache is disabled", async () => {
+				const handlerWithoutCache = new GroqHandler({
+					groqApiKey: "test-groq-api-key",
+					groqUsePromptCache: false,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt without caching"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+				const messageGenerator = handlerWithoutCache.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				// Verify standard formatting is used
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						messages: expect.arrayContaining([
+							{ role: "system", content: systemPrompt },
+							{ role: "user", content: "Test message" },
+						]),
+					}),
+					undefined,
+				)
+			})
+
+			it("should handle multiple cache read token field names", async () => {
+				const testContent = "Test content"
+
+				// Test different field names that Groq might use for cached tokens
+				const cacheFieldVariations = [
+					{ cached_tokens: 30 },
+					{ cache_read_input_tokens: 40 },
+					{ cache_tokens: 50 },
+				]
+
+				for (const cacheFields of cacheFieldVariations) {
+					vitest.clearAllMocks()
+
+					mockCreate.mockImplementationOnce(() => {
+						return {
+							[Symbol.asyncIterator]: () => ({
+								next: vitest
+									.fn()
+									.mockResolvedValueOnce({
+										done: false,
+										value: { choices: [{ delta: { content: testContent } }] },
+									})
+									.mockResolvedValueOnce({
+										done: false,
+										value: {
+											choices: [{ delta: {} }],
+											usage: {
+												prompt_tokens: 100,
+												completion_tokens: 20,
+												prompt_tokens_details: cacheFields,
+											},
+										},
+									})
+									.mockResolvedValueOnce({ done: true }),
+							}),
+						}
+					})
+
+					const stream = handler.createMessage("system prompt", [])
+					const chunks = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					// Get the expected cached tokens value
+					const expectedCachedTokens = Object.values(cacheFields)[0]
+
+					// Should properly extract cached tokens from any of the field names
+					expect(chunks[1]).toEqual({
+						type: "usage",
+						inputTokens: 100 - expectedCachedTokens,
+						outputTokens: 20,
+						cacheWriteTokens: 0,
+						cacheReadTokens: expectedCachedTokens,
+					})
+				}
+			})
+
+			it("should maintain conversation cache state across multiple messages", async () => {
+				const handlerWithCache = new GroqHandler({
+					groqApiKey: "test-groq-api-key",
+					groqUsePromptCache: true,
+				})
+
+				mockCreate.mockImplementation(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "System prompt for conversation"
+				const firstMessages: Anthropic.Messages.MessageParam[] = [
+					{ role: "user", content: "First user message" },
+				]
+
+				// First call
+				const firstGenerator = handlerWithCache.createMessage(systemPrompt, firstMessages)
+				await firstGenerator.next()
+
+				// Add more messages for second call
+				const secondMessages: Anthropic.Messages.MessageParam[] = [
+					...firstMessages,
+					{ role: "assistant", content: "First assistant response" },
+					{ role: "user", content: "Second user message" },
+				]
+
+				// Second call with extended conversation
+				const secondGenerator = handlerWithCache.createMessage(systemPrompt, secondMessages)
+				await secondGenerator.next()
+
+				// Both calls should have been made
+				expect(mockCreate).toHaveBeenCalledTimes(2)
+
+				// Verify the second call has all messages
+				const secondCallArgs = mockCreate.mock.calls[1][0]
+				expect(secondCallArgs.messages).toHaveLength(4) // system + 3 messages
+			})
+
+			it("should handle complex message content with caching", async () => {
+				const handlerWithCache = new GroqHandler({
+					groqApiKey: "test-groq-api-key",
+					groqUsePromptCache: true,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "System prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{ type: "text", text: "Part 1" },
+							{ type: "text", text: "Part 2" },
+						],
+					},
+					{
+						role: "assistant",
+						content: [
+							{ type: "text", text: "Response part 1" },
+							{ type: "text", text: "Response part 2" },
+						],
+					},
+				]
+
+				const messageGenerator = handlerWithCache.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				// Verify that complex content is properly converted
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						messages: expect.arrayContaining([
+							{ role: "system", content: systemPrompt },
+							{ role: "user", content: "Part 1\nPart 2" },
+							{ role: "assistant", content: "Response part 1\nResponse part 2" },
+						]),
+					}),
+					undefined,
+				)
+			})
+
+			it("should respect model's supportsPromptCache flag", async () => {
+				// Mock the getModel method to return a model without cache support
+				const modelId: GroqModelId = "llama-3.1-8b-instant"
+
+				const handlerWithCache = new GroqHandler({
+					apiModelId: modelId,
+					groqApiKey: "test-groq-api-key",
+					groqUsePromptCache: true, // Enabled but we'll mock the model to not support it
+				})
+
+				// Override getModel to return a model without cache support
+				const originalGetModel = handlerWithCache.getModel.bind(handlerWithCache)
+				handlerWithCache.getModel = () => {
+					const model = originalGetModel()
+					return {
+						...model,
+						info: {
+							...model.info,
+							supportsPromptCache: false, // Override to false for this test
+						},
+					}
+				}
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+				const messageGenerator = handlerWithCache.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				// Should use standard formatting when model doesn't support caching
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						messages: expect.arrayContaining([
+							{ role: "system", content: systemPrompt },
+							{ role: "user", content: "Test message" },
+						]),
+					}),
+					undefined,
+				)
+			})
+		})
+	})
 })
diff --git a/src/api/providers/groq.ts b/src/api/providers/groq.ts
index 7583edc51c..f750c8b781 100644
--- a/src/api/providers/groq.ts
+++ b/src/api/providers/groq.ts
@@ -1,10 +1,21 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
 import { type GroqModelId, groqDefaultModelId, groqModels } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import type { ApiHandlerCreateMessageMetadata } from "../index"
+import { ApiStream } from "../transform/stream"
+import { GroqCacheStrategy } from "../transform/cache-strategy/groq"
+import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
+import { convertToOpenAiMessages } from "../transform/openai-format"
 
 import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
 
 export class GroqHandler extends BaseOpenAiCompatibleProvider<GroqModelId> {
+	// Store conversation cache state for maintaining consistency
+	private conversationCacheState: Map<string, any> = new Map()
+
 	constructor(options: ApiHandlerOptions) {
 		super({
 			...options,
@@ -16,4 +27,156 @@ export class GroqHandler extends BaseOpenAiCompatibleProvider<GroqModelId> {
 			defaultTemperature: 0.5,
 		})
 	}
+
+	// Override createStream to apply caching strategy
+	protected override createStream(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+		requestOptions?: OpenAI.RequestOptions,
+	) {
+		const { id: model, info: modelInfo } = this.getModel()
+
+		// Check if prompt caching is enabled for this model
+		const usePromptCache = Boolean(this.options.groqUsePromptCache && modelInfo.supportsPromptCache)
+
+		let formattedMessages: OpenAI.Chat.Completions.ChatCompletionMessageParam[]
+
+		if (usePromptCache) {
+			// Use cache strategy to format messages optimally
+			const cacheModelInfo: CacheModelInfo = {
+				maxTokens: modelInfo.maxTokens || 8192,
+				contextWindow: modelInfo.contextWindow || 131072,
+				supportsPromptCache: modelInfo.supportsPromptCache || false,
+				maxCachePoints: 4, // Groq doesn't use explicit cache points, but we set a reasonable default
+				minTokensPerCachePoint: 1024, // Groq caches automatically, but we use this for tracking
+				cachableFields: ["system", "messages"], // Groq can cache both
+			}
+
+			// Generate a conversation ID for cache tracking
+			const conversationId = this.generateConversationId(messages)
+
+			const cacheStrategy = new GroqCacheStrategy({
+				modelInfo: cacheModelInfo,
+				systemPrompt,
+				messages,
+				usePromptCache,
+				previousCachePointPlacements: this.conversationCacheState.get(conversationId),
+			})
+
+			const cacheResult = cacheStrategy.determineOptimalCachePoints()
+
+			// Store cache state for next request
+			if (cacheResult.messageCachePointPlacements) {
+				this.conversationCacheState.set(conversationId, cacheResult.messageCachePointPlacements)
+			}
+
+			// Convert to OpenAI format using the cache strategy
+			formattedMessages = cacheStrategy.convertToOpenAIFormat(systemPrompt, messages)
+		} else {
+			// Use default formatting without caching
+			formattedMessages = this.formatMessagesDefault(systemPrompt, messages)
+		}
+
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model,
+			max_tokens: modelInfo.maxTokens || 8192,
+			messages: formattedMessages,
+			stream: true,
+			stream_options: { include_usage: true },
+		}
+
+		// Only include temperature if explicitly set
+		if (this.options.modelTemperature !== undefined) {
+			params.temperature = this.options.modelTemperature
+		}
+
+		return this.client.chat.completions.create(params, requestOptions)
+	}
+
+	// Helper method to format messages without caching
+	private formatMessagesDefault(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+	): OpenAI.Chat.Completions.ChatCompletionMessageParam[] {
+		const result: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = []
+
+		if (systemPrompt) {
+			result.push({ role: "system", content: systemPrompt })
+		}
+
+		// Use the imported convertToOpenAiMessages function
+		result.push(...convertToOpenAiMessages(messages))
+
+		return result
+	}
+
+	// Generate a stable conversation ID for cache tracking
+	private generateConversationId(messages: Anthropic.Messages.MessageParam[]): string {
+		if (messages.length === 0) {
+			return "empty_conversation"
+		}
+
+		// Use first message content as basis for ID (truncated for efficiency)
+		const firstMessage = messages[0]
+		const content = typeof firstMessage.content === "string" ? firstMessage.content : "complex_content"
+
+		return `conv_${firstMessage.role}_${content.substring(0, 20).replace(/\s+/g, "_")}`
+	}
+
+	// Override to handle Groq's usage metrics, including caching
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const stream = await this.createStream(systemPrompt, messages, metadata)
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+
+			if (chunk.usage) {
+				// Groq includes cached token information in prompt_tokens_details
+				const promptTokens = chunk.usage.prompt_tokens || 0
+				const completionTokens = chunk.usage.completion_tokens || 0
+
+				// Check multiple possible locations for cached tokens
+				// Groq may return cached tokens in different fields depending on API version
+				const promptDetails = (chunk.usage as any).prompt_tokens_details || {}
+				const cachedTokens =
+					promptDetails.cached_tokens ||
+					promptDetails.cache_read_input_tokens ||
+					promptDetails.cache_tokens ||
+					0
+
+				// Calculate non-cached input tokens
+				const nonCachedInputTokens = Math.max(0, promptTokens - cachedTokens)
+
+				yield {
+					type: "usage",
+					inputTokens: nonCachedInputTokens,
+					outputTokens: completionTokens,
+					cacheWriteTokens: 0, // Groq doesn't track cache writes separately
+					cacheReadTokens: cachedTokens,
+				}
+			}
+		}
+	}
+
+	// Clean up old conversation cache entries periodically
+	private cleanupCacheState() {
+		// Keep only the last 100 conversations to prevent memory growth
+		if (this.conversationCacheState.size > 100) {
+			const entries = Array.from(this.conversationCacheState.entries())
+			const toKeep = entries.slice(-50) // Keep the last 50
+			this.conversationCacheState = new Map(toKeep)
+		}
+	}
 }
diff --git a/src/api/transform/cache-strategy/__tests__/groq.spec.ts b/src/api/transform/cache-strategy/__tests__/groq.spec.ts
new file mode 100644
index 0000000000..f3ff9f21fe
--- /dev/null
+++ b/src/api/transform/cache-strategy/__tests__/groq.spec.ts
@@ -0,0 +1,173 @@
+// npx vitest run src/api/transform/cache-strategy/__tests__/groq.spec.ts
+
+import { Anthropic } from "@anthropic-ai/sdk"
+import { GroqCacheStrategy } from "../groq"
+import { CacheStrategyConfig } from "../types"
+
+describe("GroqCacheStrategy", () => {
+	const createConfig = (overrides?: Partial<CacheStrategyConfig>): CacheStrategyConfig => ({
+		modelInfo: {
+			maxTokens: 8192,
+			contextWindow: 131072,
+			supportsPromptCache: true,
+			maxCachePoints: 4,
+			minTokensPerCachePoint: 1024,
+			cachableFields: ["system", "messages"],
+		},
+		systemPrompt: "Test system prompt",
+		messages: [],
+		usePromptCache: true,
+		...overrides,
+	})
+
+	describe("determineOptimalCachePoints", () => {
+		it("should return formatted messages without explicit cache points", () => {
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Hello" },
+				{ role: "assistant", content: "Hi there" },
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.determineOptimalCachePoints()
+
+			// Should have system blocks
+			expect(result.system).toHaveLength(1)
+			expect(result.system[0]).toHaveProperty("text", "Test system prompt")
+
+			// Should have messages
+			expect(result.messages).toHaveLength(2)
+		})
+
+		it("should track virtual cache points for monitoring", () => {
+			// Create a message that's long enough to meet the 1024 token threshold
+			// Approximately 4 characters per token, so we need ~4096 characters
+			const longMessage = "This is a very long message that needs to meet the token threshold. ".repeat(100)
+
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Short first message" },
+				{ role: "assistant", content: "Response" },
+				{ role: "user", content: longMessage }, // This should meet the threshold
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.determineOptimalCachePoints()
+
+			// Should track the last user message as a virtual cache point if it meets threshold
+			expect(result.messageCachePointPlacements).toBeDefined()
+			expect(result.messageCachePointPlacements).toHaveLength(1)
+			expect(result.messageCachePointPlacements![0]).toMatchObject({
+				index: 2, // Last user message
+				type: "message",
+			})
+		})
+
+		it("should not add cache points when caching is disabled", () => {
+			const longMessage = "This is a very long message that needs to meet the token threshold. ".repeat(100)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: longMessage }]
+
+			const config = createConfig({ messages, usePromptCache: false })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.determineOptimalCachePoints()
+
+			// Should not track any cache points when caching is disabled
+			expect(result.messageCachePointPlacements).toHaveLength(0)
+		})
+	})
+
+	describe("convertToOpenAIFormat", () => {
+		it("should convert simple messages correctly", () => {
+			const systemPrompt = "System prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Hello" },
+				{ role: "assistant", content: "Hi there" },
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.convertToOpenAIFormat(systemPrompt, messages)
+
+			expect(result).toHaveLength(3)
+			expect(result[0]).toEqual({ role: "system", content: systemPrompt })
+			expect(result[1]).toEqual({ role: "user", content: "Hello" })
+			expect(result[2]).toEqual({ role: "assistant", content: "Hi there" })
+		})
+
+		it("should handle multi-part content", () => {
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "Part 1" },
+						{ type: "text", text: "Part 2" },
+					],
+				},
+				{
+					role: "assistant",
+					content: [
+						{ type: "text", text: "Response 1" },
+						{ type: "text", text: "Response 2" },
+					],
+				},
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.convertToOpenAIFormat(undefined, messages)
+
+			expect(result).toHaveLength(2)
+			expect(result[0]).toEqual({ role: "user", content: "Part 1\nPart 2" })
+			expect(result[1]).toEqual({ role: "assistant", content: "Response 1\nResponse 2" })
+		})
+
+		it("should include empty messages", () => {
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "" },
+				{ role: "assistant", content: "Response" },
+				{ role: "user", content: [] }, // Empty array
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.convertToOpenAIFormat(undefined, messages)
+
+			// Groq strategy includes empty messages (OpenAI API will handle them)
+			expect(result).toHaveLength(2)
+			expect(result[0]).toEqual({ role: "user", content: "" })
+			expect(result[1]).toEqual({ role: "assistant", content: "Response" })
+		})
+
+		it("should handle system prompt correctly", () => {
+			const systemPrompt = "System instructions"
+			const messages: Anthropic.Messages.MessageParam[] = []
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.convertToOpenAIFormat(systemPrompt, messages)
+
+			expect(result).toHaveLength(1)
+			expect(result[0]).toEqual({ role: "system", content: systemPrompt })
+		})
+
+		it("should filter out non-text content types", () => {
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "Text content" },
+						{ type: "image", source: { type: "base64", media_type: "image/png", data: "..." } } as any,
+					],
+				},
+			]
+
+			const config = createConfig({ messages })
+			const strategy = new GroqCacheStrategy(config)
+			const result = strategy.convertToOpenAIFormat(undefined, messages)
+
+			// Should only include text content
+			expect(result).toHaveLength(1)
+			expect(result[0]).toEqual({ role: "user", content: "Text content" })
+		})
+	})
+})
diff --git a/src/api/transform/cache-strategy/groq.ts b/src/api/transform/cache-strategy/groq.ts
new file mode 100644
index 0000000000..135c367be0
--- /dev/null
+++ b/src/api/transform/cache-strategy/groq.ts
@@ -0,0 +1,129 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+import { CacheStrategy } from "./base-strategy"
+import { CacheResult, CachePointPlacement, CacheStrategyConfig } from "./types"
+import { SystemContentBlock, Message } from "@aws-sdk/client-bedrock-runtime"
+
+/**
+ * Groq-specific cache strategy implementation.
+ *
+ * Groq's caching works differently from Anthropic/Bedrock:
+ * - Groq automatically caches message prefixes based on exact matches
+ * - No explicit cache points are needed in the API request
+ * - The API returns cache hit information in the usage response
+ * - Caching is automatic for repeated message prefixes
+ *
+ * This strategy formats messages for optimal caching with Groq's automatic system.
+ */
+export class GroqCacheStrategy extends CacheStrategy {
+	/**
+	 * Determine optimal cache point placements for Groq.
+	 * Since Groq handles caching automatically, we don't add explicit cache points.
+	 * Instead, we ensure messages are formatted consistently for optimal cache hits.
+	 */
+	public determineOptimalCachePoints(): CacheResult {
+		// Groq doesn't use explicit cache points, so we just return formatted messages
+		const systemBlocks: SystemContentBlock[] = this.config.systemPrompt
+			? [{ text: this.config.systemPrompt } as unknown as SystemContentBlock]
+			: []
+
+		const messages = this.messagesToContentBlocks(this.config.messages)
+
+		// Track placements for consistency (even though Groq doesn't use them)
+		const placements: CachePointPlacement[] = []
+
+		// For Groq, we track which messages would be cached based on the prefix matching
+		// This helps with monitoring and debugging
+		if (this.config.usePromptCache && this.config.messages.length > 0) {
+			// Groq caches message prefixes automatically
+			// We can track the last user message as a "virtual" cache point for monitoring
+			for (let i = this.config.messages.length - 1; i >= 0; i--) {
+				if (this.config.messages[i].role === "user") {
+					const tokenCount = this.estimateTokenCount(this.config.messages[i])
+					if (this.meetsMinTokenThreshold(tokenCount)) {
+						placements.push({
+							index: i,
+							type: "message",
+							tokensCovered: tokenCount,
+						})
+					}
+					break // Only track the last user message for Groq
+				}
+			}
+		}
+
+		return {
+			system: systemBlocks,
+			messages,
+			messageCachePointPlacements: placements,
+		}
+	}
+
+	/**
+	 * Convert messages to OpenAI format for Groq.
+	 * Groq uses OpenAI-compatible format.
+	 */
+	public convertToOpenAIFormat(
+		systemPrompt: string | undefined,
+		messages: Anthropic.Messages.MessageParam[],
+	): OpenAI.Chat.Completions.ChatCompletionMessageParam[] {
+		const result: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = []
+
+		// Add system message if present
+		if (systemPrompt) {
+			result.push({
+				role: "system",
+				content: systemPrompt,
+			})
+		}
+
+		// Convert messages to OpenAI format
+		for (const message of messages) {
+			if (message.role === "user") {
+				// Handle user messages
+				if (typeof message.content === "string") {
+					result.push({
+						role: "user",
+						content: message.content,
+					})
+				} else if (Array.isArray(message.content)) {
+					// Handle multi-part content
+					const textParts = message.content
+						.filter((part) => part.type === "text")
+						.map((part) => part.text)
+						.join("\n")
+
+					if (textParts) {
+						result.push({
+							role: "user",
+							content: textParts,
+						})
+					}
+				}
+			} else if (message.role === "assistant") {
+				// Handle assistant messages
+				if (typeof message.content === "string") {
+					result.push({
+						role: "assistant",
+						content: message.content,
+					})
+				} else if (Array.isArray(message.content)) {
+					// Handle multi-part content
+					const textParts = message.content
+						.filter((part) => part.type === "text")
+						.map((part) => part.text)
+						.join("\n")
+
+					if (textParts) {
+						result.push({
+							role: "assistant",
+							content: textParts,
+						})
+					}
+				}
+			}
+		}
+
+		return result
+	}
+}