Allow prompt caching to be enabled / disabled for models on OpenRouter

cte · cte · commit b0337f38cdf5 · 2025-04-24T15:03:11.000-07:00
diff --git a/src/api/providers/fetchers/__tests__/fixtures/openrouter-models.json b/src/api/providers/fetchers/__tests__/fixtures/openrouter-models.json
diff --git a/src/api/providers/fetchers/__tests__/openrouter.test.ts b/src/api/providers/fetchers/__tests__/openrouter.test.ts
@@ -66,6 +66,31 @@ describe("OpenRouter API", () => {
 				supportsComputerUse: true,
 			})
 
+			expect(
+				Object.entries(models)
+					.filter(([id, _]) => id.startsWith("anthropic/claude-3"))
+					.map(([id, model]) => ({ id, maxTokens: model.maxTokens }))
+					.sort(({ id: a }, { id: b }) => a.localeCompare(b)),
+			).toEqual([
+				{ id: "anthropic/claude-3-haiku", maxTokens: 4096 },
+				{ id: "anthropic/claude-3-haiku:beta", maxTokens: 4096 },
+				{ id: "anthropic/claude-3-opus", maxTokens: 4096 },
+				{ id: "anthropic/claude-3-opus:beta", maxTokens: 4096 },
+				{ id: "anthropic/claude-3-sonnet", maxTokens: 4096 },
+				{ id: "anthropic/claude-3-sonnet:beta", maxTokens: 4096 },
+				{ id: "anthropic/claude-3.5-haiku", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-haiku-20241022", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-haiku-20241022:beta", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-haiku:beta", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-sonnet", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-sonnet-20240620", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-sonnet-20240620:beta", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.5-sonnet:beta", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.7-sonnet", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.7-sonnet:beta", maxTokens: 8192 },
+				{ id: "anthropic/claude-3.7-sonnet:thinking", maxTokens: 128000 },
+			])
+
 			nockDone()
 		})
 	})
diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts
@@ -1,7 +1,13 @@
 import axios from "axios"
 import { z } from "zod"
 
-import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
+import {
+	ApiHandlerOptions,
+	ModelInfo,
+	anthropicModels,
+	COMPUTER_USE_MODELS,
+	OPTIONAL_PROMPT_CACHING_MODELS,
+} from "../../../shared/api"
 import { parseApiPrice } from "../../../utils/cost"
 
 // https://openrouter.ai/api/v1/models
@@ -62,8 +68,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
 				? parseApiPrice(rawModel.pricing?.input_cache_read)
 				: undefined
 
-			// Disable prompt caching for Gemini models for now.
-			const supportsPromptCache = !!cacheWritesPrice && !!cacheReadsPrice && !rawModel.id.startsWith("google")
+			const supportsPromptCache = !!cacheWritesPrice && !!cacheReadsPrice
 
 			const modelInfo: ModelInfo = {
 				maxTokens: rawModel.top_provider?.max_completion_tokens,
@@ -78,29 +83,25 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
 				thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
 			}
 
-			// Then OpenRouter model definition doesn't give us any hints about computer use,
-			// so we need to set that manually.
-			// The ideal `maxTokens` values are model dependent, but we should probably DRY
-			// this up and use the values defined for the Anthropic providers.
-			switch (true) {
-				case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
-					modelInfo.supportsComputerUse = true
-					modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet"):
-					modelInfo.supportsComputerUse = true
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3-5-haiku"):
-				case rawModel.id.startsWith("anthropic/claude-3-opus"):
-				case rawModel.id.startsWith("anthropic/claude-3-haiku"):
-					modelInfo.maxTokens = 8192
-					break
-				default:
-					break
+			// The OpenRouter model definition doesn't give us any hints about
+			// computer use, so we need to set that manually.
+			if (COMPUTER_USE_MODELS.has(rawModel.id)) {
+				modelInfo.supportsComputerUse = true
+			}
+
+			// We want to treat prompt caching as "experimental" for these models.
+			if (OPTIONAL_PROMPT_CACHING_MODELS.has(rawModel.id)) {
+				modelInfo.isPromptCacheOptional = true
+			}
+
+			// Claude 3.7 Sonnet is a "hybrid" thinking model, and the `maxTokens`
+			// values can be configured. For the non-thinking variant we want to
+			// use 8k. The `thinking` variant can be run in 64k and 128k modes,
+			// and we want to use 128k.
+			if (rawModel.id.startsWith("anthropic/claude-3.7-sonnet")) {
+				modelInfo.maxTokens = rawModel.id.includes("thinking")
+					? anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens
+					: anthropicModels["claude-3-7-sonnet-20250219"].maxTokens
 			}
 
 			models[rawModel.id] = modelInfo
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -1401,8 +1401,10 @@ export const vscodeLlmModels = {
  * Constants
  */
 
+// These models support reasoning efforts.
 export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta", "grok-3-mini-beta", "grok-3-mini-fast-beta"])
 
+// These models support prompt caching.
 export const PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-3-haiku",
 	"anthropic/claude-3-haiku:beta",
@@ -1421,7 +1423,26 @@ export const PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-3.7-sonnet",
 	"anthropic/claude-3.7-sonnet:beta",
 	"anthropic/claude-3.7-sonnet:thinking",
-	// "google/gemini-2.0-flash-001",
-	// "google/gemini-flash-1.5",
-	// "google/gemini-flash-1.5-8b",
+	// "google/gemini-2.5-pro-preview-03-25",
+	"google/gemini-2.0-flash-001",
+	"google/gemini-flash-1.5",
+	"google/gemini-flash-1.5-8b",
+])
+
+// These models don't have prompt caching enabled by default (you can turn it on
+// in settings).
+export const OPTIONAL_PROMPT_CACHING_MODELS = new Set([
+	// "google/gemini-2.5-pro-preview-03-25",
+	"google/gemini-2.0-flash-001",
+	"google/gemini-flash-1.5",
+	"google/gemini-flash-1.5-8b",
+])
+
+// https://www.anthropic.com/news/3-5-models-and-computer-use
+export const COMPUTER_USE_MODELS = new Set([
+	"anthropic/claude-3.5-sonnet",
+	"anthropic/claude-3.5-sonnet:beta",
+	"anthropic/claude-3.7-sonnet",
+	"anthropic/claude-3.7-sonnet:beta",
+	"anthropic/claude-3.7-sonnet:thinking",
 ])