11import axios from "axios"
22import { z } from "zod"
33
4- import { ApiHandlerOptions , ModelInfo } from "../../../shared/api"
4+ import {
5+ ApiHandlerOptions ,
6+ ModelInfo ,
7+ anthropicModels ,
8+ COMPUTER_USE_MODELS ,
9+ OPTIONAL_PROMPT_CACHING_MODELS ,
10+ } from "../../../shared/api"
511import { parseApiPrice } from "../../../utils/cost"
612
713// https://openrouter.ai/api/v1/models
@@ -62,8 +68,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
6268 ? parseApiPrice ( rawModel . pricing ?. input_cache_read )
6369 : undefined
6470
65- // Disable prompt caching for Gemini models for now.
66- const supportsPromptCache = ! ! cacheWritesPrice && ! ! cacheReadsPrice && ! rawModel . id . startsWith ( "google" )
71+ const supportsPromptCache = ! ! cacheWritesPrice && ! ! cacheReadsPrice
6772
6873 const modelInfo : ModelInfo = {
6974 maxTokens : rawModel . top_provider ?. max_completion_tokens ,
@@ -78,29 +83,25 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
7883 thinking : rawModel . id === "anthropic/claude-3.7-sonnet:thinking" ,
7984 }
8085
81- // Then OpenRouter model definition doesn't give us any hints about computer use,
82- // so we need to set that manually.
83- // The ideal `maxTokens` values are model dependent, but we should probably DRY
84- // this up and use the values defined for the Anthropic providers.
85- switch ( true ) {
86- case rawModel . id . startsWith ( "anthropic/claude-3.7-sonnet" ) :
87- modelInfo . supportsComputerUse = true
88- modelInfo . maxTokens = rawModel . id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
89- break
90- case rawModel . id . startsWith ( "anthropic/claude-3.5-sonnet-20240620" ) :
91- modelInfo . maxTokens = 8192
92- break
93- case rawModel . id . startsWith ( "anthropic/claude-3.5-sonnet" ) :
94- modelInfo . supportsComputerUse = true
95- modelInfo . maxTokens = 8192
96- break
97- case rawModel . id . startsWith ( "anthropic/claude-3-5-haiku" ) :
98- case rawModel . id . startsWith ( "anthropic/claude-3-opus" ) :
99- case rawModel . id . startsWith ( "anthropic/claude-3-haiku" ) :
100- modelInfo . maxTokens = 8192
101- break
102- default :
103- break
86+ // The OpenRouter model definition doesn't give us any hints about
87+ // computer use, so we need to set that manually.
88+ if ( COMPUTER_USE_MODELS . has ( rawModel . id ) ) {
89+ modelInfo . supportsComputerUse = true
90+ }
91+
92+ // We want to treat prompt caching as "experimental" for these models.
93+ if ( OPTIONAL_PROMPT_CACHING_MODELS . has ( rawModel . id ) ) {
94+ modelInfo . isPromptCacheOptional = true
95+ }
96+
97+ // Claude 3.7 Sonnet is a "hybrid" thinking model, and the `maxTokens`
98+ // values can be configured. For the non-thinking variant we want to
99+ // use 8k. The `thinking` variant can be run in 64k and 128k modes,
100+ // and we want to use 128k.
101+ if ( rawModel . id . startsWith ( "anthropic/claude-3.7-sonnet" ) ) {
102+ modelInfo . maxTokens = rawModel . id . includes ( "thinking" )
103+ ? anthropicModels [ "claude-3-7-sonnet-20250219:thinking" ] . maxTokens
104+ : anthropicModels [ "claude-3-7-sonnet-20250219" ] . maxTokens
104105 }
105106
106107 models [ rawModel . id ] = modelInfo
0 commit comments