Skip to content

Commit b0337f3

Browse files
committed
Allow prompt caching to be enabled / disabled for models on OpenRouter
1 parent a3f1a3f commit b0337f3

File tree

4 files changed

+79
-32
lines changed

4 files changed

+79
-32
lines changed

src/api/providers/fetchers/__tests__/fixtures/openrouter-models.json

Lines changed: 3 additions & 3 deletions
Large diffs are not rendered by default.

src/api/providers/fetchers/__tests__/openrouter.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,31 @@ describe("OpenRouter API", () => {
6666
supportsComputerUse: true,
6767
})
6868

69+
expect(
70+
Object.entries(models)
71+
.filter(([id, _]) => id.startsWith("anthropic/claude-3"))
72+
.map(([id, model]) => ({ id, maxTokens: model.maxTokens }))
73+
.sort(({ id: a }, { id: b }) => a.localeCompare(b)),
74+
).toEqual([
75+
{ id: "anthropic/claude-3-haiku", maxTokens: 4096 },
76+
{ id: "anthropic/claude-3-haiku:beta", maxTokens: 4096 },
77+
{ id: "anthropic/claude-3-opus", maxTokens: 4096 },
78+
{ id: "anthropic/claude-3-opus:beta", maxTokens: 4096 },
79+
{ id: "anthropic/claude-3-sonnet", maxTokens: 4096 },
80+
{ id: "anthropic/claude-3-sonnet:beta", maxTokens: 4096 },
81+
{ id: "anthropic/claude-3.5-haiku", maxTokens: 8192 },
82+
{ id: "anthropic/claude-3.5-haiku-20241022", maxTokens: 8192 },
83+
{ id: "anthropic/claude-3.5-haiku-20241022:beta", maxTokens: 8192 },
84+
{ id: "anthropic/claude-3.5-haiku:beta", maxTokens: 8192 },
85+
{ id: "anthropic/claude-3.5-sonnet", maxTokens: 8192 },
86+
{ id: "anthropic/claude-3.5-sonnet-20240620", maxTokens: 8192 },
87+
{ id: "anthropic/claude-3.5-sonnet-20240620:beta", maxTokens: 8192 },
88+
{ id: "anthropic/claude-3.5-sonnet:beta", maxTokens: 8192 },
89+
{ id: "anthropic/claude-3.7-sonnet", maxTokens: 8192 },
90+
{ id: "anthropic/claude-3.7-sonnet:beta", maxTokens: 8192 },
91+
{ id: "anthropic/claude-3.7-sonnet:thinking", maxTokens: 128000 },
92+
])
93+
6994
nockDone()
7095
})
7196
})

src/api/providers/fetchers/openrouter.ts

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
import axios from "axios"
22
import { z } from "zod"
33

4-
import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
4+
import {
5+
ApiHandlerOptions,
6+
ModelInfo,
7+
anthropicModels,
8+
COMPUTER_USE_MODELS,
9+
OPTIONAL_PROMPT_CACHING_MODELS,
10+
} from "../../../shared/api"
511
import { parseApiPrice } from "../../../utils/cost"
612

713
// https://openrouter.ai/api/v1/models
@@ -62,8 +68,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
6268
? parseApiPrice(rawModel.pricing?.input_cache_read)
6369
: undefined
6470

65-
// Disable prompt caching for Gemini models for now.
66-
const supportsPromptCache = !!cacheWritesPrice && !!cacheReadsPrice && !rawModel.id.startsWith("google")
71+
const supportsPromptCache = !!cacheWritesPrice && !!cacheReadsPrice
6772

6873
const modelInfo: ModelInfo = {
6974
maxTokens: rawModel.top_provider?.max_completion_tokens,
@@ -78,29 +83,25 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
7883
thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
7984
}
8085

81-
// Then OpenRouter model definition doesn't give us any hints about computer use,
82-
// so we need to set that manually.
83-
// The ideal `maxTokens` values are model dependent, but we should probably DRY
84-
// this up and use the values defined for the Anthropic providers.
85-
switch (true) {
86-
case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
87-
modelInfo.supportsComputerUse = true
88-
modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
89-
break
90-
case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
91-
modelInfo.maxTokens = 8192
92-
break
93-
case rawModel.id.startsWith("anthropic/claude-3.5-sonnet"):
94-
modelInfo.supportsComputerUse = true
95-
modelInfo.maxTokens = 8192
96-
break
97-
case rawModel.id.startsWith("anthropic/claude-3-5-haiku"):
98-
case rawModel.id.startsWith("anthropic/claude-3-opus"):
99-
case rawModel.id.startsWith("anthropic/claude-3-haiku"):
100-
modelInfo.maxTokens = 8192
101-
break
102-
default:
103-
break
86+
// The OpenRouter model definition doesn't give us any hints about
87+
// computer use, so we need to set that manually.
88+
if (COMPUTER_USE_MODELS.has(rawModel.id)) {
89+
modelInfo.supportsComputerUse = true
90+
}
91+
92+
// We want to treat prompt caching as "experimental" for these models.
93+
if (OPTIONAL_PROMPT_CACHING_MODELS.has(rawModel.id)) {
94+
modelInfo.isPromptCacheOptional = true
95+
}
96+
97+
// Claude 3.7 Sonnet is a "hybrid" thinking model, and the `maxTokens`
98+
// values can be configured. For the non-thinking variant we want to
99+
// use 8k. The `thinking` variant can be run in 64k and 128k modes,
100+
// and we want to use 128k.
101+
if (rawModel.id.startsWith("anthropic/claude-3.7-sonnet")) {
102+
modelInfo.maxTokens = rawModel.id.includes("thinking")
103+
? anthropicModels["claude-3-7-sonnet-20250219:thinking"].maxTokens
104+
: anthropicModels["claude-3-7-sonnet-20250219"].maxTokens
104105
}
105106

106107
models[rawModel.id] = modelInfo

src/shared/api.ts

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,8 +1401,10 @@ export const vscodeLlmModels = {
14011401
* Constants
14021402
*/
14031403

1404+
// These models support reasoning efforts.
14041405
export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta", "grok-3-mini-beta", "grok-3-mini-fast-beta"])
14051406

1407+
// These models support prompt caching.
14061408
export const PROMPT_CACHING_MODELS = new Set([
14071409
"anthropic/claude-3-haiku",
14081410
"anthropic/claude-3-haiku:beta",
@@ -1421,7 +1423,26 @@ export const PROMPT_CACHING_MODELS = new Set([
14211423
"anthropic/claude-3.7-sonnet",
14221424
"anthropic/claude-3.7-sonnet:beta",
14231425
"anthropic/claude-3.7-sonnet:thinking",
1424-
// "google/gemini-2.0-flash-001",
1425-
// "google/gemini-flash-1.5",
1426-
// "google/gemini-flash-1.5-8b",
1426+
// "google/gemini-2.5-pro-preview-03-25",
1427+
"google/gemini-2.0-flash-001",
1428+
"google/gemini-flash-1.5",
1429+
"google/gemini-flash-1.5-8b",
1430+
])
1431+
1432+
// These models don't have prompt caching enabled by default (you can turn it on
1433+
// in settings).
1434+
export const OPTIONAL_PROMPT_CACHING_MODELS = new Set([
1435+
// "google/gemini-2.5-pro-preview-03-25",
1436+
"google/gemini-2.0-flash-001",
1437+
"google/gemini-flash-1.5",
1438+
"google/gemini-flash-1.5-8b",
1439+
])
1440+
1441+
// https://www.anthropic.com/news/3-5-models-and-computer-use
1442+
export const COMPUTER_USE_MODELS = new Set([
1443+
"anthropic/claude-3.5-sonnet",
1444+
"anthropic/claude-3.5-sonnet:beta",
1445+
"anthropic/claude-3.7-sonnet",
1446+
"anthropic/claude-3.7-sonnet:beta",
1447+
"anthropic/claude-3.7-sonnet:thinking",
14271448
])

0 commit comments

Comments
 (0)