Skip to content

Commit 8e4c0ae

Browse files
authored
Show cache read and write prices for OpenRouter inference providers (#7176)
1 parent 0fdd157 commit 8e4c0ae

File tree

3 files changed

+16
-32
lines changed

3 files changed

+16
-32
lines changed

src/api/providers/fetchers/__tests__/openrouter.spec.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ describe("OpenRouter API", () => {
2424
const models = await getOpenRouterModels()
2525

2626
const openRouterSupportedCaching = Object.entries(models)
27+
.filter(([id, _]) => id.startsWith("anthropic/claude") || id.startsWith("google/gemini")) // only these support cache_control breakpoints (https://openrouter.ai/docs/features/prompt-caching)
2728
.filter(([_, model]) => model.supportsPromptCache)
2829
.map(([id, _]) => id)
2930

@@ -229,7 +230,7 @@ describe("OpenRouter API", () => {
229230
const endpoints = await getOpenRouterModelEndpoints("google/gemini-2.5-pro-preview")
230231

231232
expect(endpoints).toEqual({
232-
Google: {
233+
"google-vertex": {
233234
maxTokens: 65535,
234235
contextWindow: 1048576,
235236
supportsImages: true,
@@ -243,7 +244,7 @@ describe("OpenRouter API", () => {
243244
supportsReasoningEffort: undefined,
244245
supportedParameters: undefined,
245246
},
246-
"Google AI Studio": {
247+
"google-ai-studio": {
247248
maxTokens: 65536,
248249
contextWindow: 1048576,
249250
supportsImages: true,

src/api/providers/fetchers/openrouter.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ export type OpenRouterModel = z.infer<typeof openRouterModelSchema>
5858

5959
export const openRouterModelEndpointSchema = modelRouterBaseModelSchema.extend({
6060
provider_name: z.string(),
61+
tag: z.string().optional(),
6162
})
6263

6364
export type OpenRouterModelEndpoint = z.infer<typeof openRouterModelEndpointSchema>
@@ -149,7 +150,7 @@ export async function getOpenRouterModelEndpoints(
149150
const { id, architecture, endpoints } = data
150151

151152
for (const endpoint of endpoints) {
152-
models[endpoint.provider_name] = parseOpenRouterModel({
153+
models[endpoint.tag ?? endpoint.provider_name] = parseOpenRouterModel({
153154
id,
154155
model: endpoint,
155156
modality: architecture?.modality,
@@ -188,7 +189,7 @@ export const parseOpenRouterModel = ({
188189

189190
const cacheReadsPrice = model.pricing?.input_cache_read ? parseApiPrice(model.pricing?.input_cache_read) : undefined
190191

191-
const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
192+
const supportsPromptCache = typeof cacheReadsPrice !== "undefined" // some models support caching but don't charge a cacheWritesPrice, e.g. GPT-5
192193

193194
const modelInfo: ModelInfo = {
194195
maxTokens: maxTokens || Math.ceil(model.context_length * 0.2),

webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@ const openRouterEndpointsSchema = z.object({
2222
endpoints: z.array(
2323
z.object({
2424
name: z.string(),
25+
tag: z.string().optional(),
2526
context_length: z.number(),
2627
max_completion_tokens: z.number().nullish(),
2728
pricing: z
2829
.object({
2930
prompt: z.union([z.string(), z.number()]).optional(),
3031
completion: z.union([z.string(), z.number()]).optional(),
32+
input_cache_read: z.union([z.string(), z.number()]).optional(),
33+
input_cache_write: z.union([z.string(), z.number()]).optional(),
3134
})
3235
.optional(),
3336
}),
@@ -51,49 +54,28 @@ async function getOpenRouterProvidersForModel(modelId: string) {
5154
return models
5255
}
5356

54-
const { id, description, architecture, endpoints } = result.data.data
57+
const { description, architecture, endpoints } = result.data.data
5558

5659
for (const endpoint of endpoints) {
57-
const providerName = endpoint.name.split("|")[0].trim()
60+
const providerName = endpoint.tag ?? endpoint.name
5861
const inputPrice = parseApiPrice(endpoint.pricing?.prompt)
5962
const outputPrice = parseApiPrice(endpoint.pricing?.completion)
63+
const cacheReadsPrice = parseApiPrice(endpoint.pricing?.input_cache_read)
64+
const cacheWritesPrice = parseApiPrice(endpoint.pricing?.input_cache_write)
6065

6166
const modelInfo: OpenRouterModelProvider = {
6267
maxTokens: endpoint.max_completion_tokens || endpoint.context_length,
6368
contextWindow: endpoint.context_length,
6469
supportsImages: architecture?.modality?.includes("image"),
65-
supportsPromptCache: false,
70+
supportsPromptCache: typeof cacheReadsPrice !== "undefined",
71+
cacheReadsPrice,
72+
cacheWritesPrice,
6673
inputPrice,
6774
outputPrice,
6875
description,
6976
label: providerName,
7077
}
7178

72-
// TODO: This is wrong. We need to fetch the model info from
73-
// OpenRouter instead of hardcoding it here. The endpoints payload
74-
// doesn't include this unfortunately, so we need to get it from the
75-
// main models endpoint.
76-
switch (true) {
77-
case modelId.startsWith("anthropic/claude-3.7-sonnet"):
78-
modelInfo.supportsComputerUse = true
79-
modelInfo.supportsPromptCache = true
80-
modelInfo.cacheWritesPrice = 3.75
81-
modelInfo.cacheReadsPrice = 0.3
82-
modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
83-
break
84-
case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
85-
modelInfo.supportsPromptCache = true
86-
modelInfo.cacheWritesPrice = 3.75
87-
modelInfo.cacheReadsPrice = 0.3
88-
modelInfo.maxTokens = 8192
89-
break
90-
default:
91-
modelInfo.supportsPromptCache = true
92-
modelInfo.cacheWritesPrice = 0.3
93-
modelInfo.cacheReadsPrice = 0.03
94-
break
95-
}
96-
9779
models[providerName] = modelInfo
9880
}
9981
} catch (error) {

0 commit comments

Comments
 (0)