44 * Both lookup tables use substring matching, so key order matters:
55 * "gpt-4o-mini" must come before "gpt-4o" or the shorter key would
66 * match first. Keep entries most-specific-first within each provider.
7+ *
8+ * Prices sourced from OpenRouter and litellm model_prices_and_context_window.json.
9+ * Keys are matched as substrings of the incoming model string, so they work
10+ * for both direct-API model IDs and OpenRouter-prefixed IDs.
711 */
812
913// ----------------------------------------------------------------------------
1721 */
1822export const CONTEXT_LIMITS : Record < string , number > = {
1923 // Anthropic
24+ "claude-opus-4.6" : 1000000 ,
25+ "claude-opus-4.5" : 200000 ,
26+ "claude-opus-4.1" : 200000 ,
2027 "claude-opus-4" : 200000 ,
21- "claude-sonnet-4" : 200000 ,
28+ "claude-sonnet-4.6" : 1000000 ,
29+ "claude-sonnet-4.5" : 1000000 ,
30+ "claude-sonnet-4" : 1000000 ,
31+ "claude-haiku-4.5" : 200000 ,
2232 "claude-haiku-4" : 200000 ,
33+ "claude-3-7-sonnet" : 200000 ,
2334 "claude-3-5-sonnet" : 200000 ,
2435 "claude-3-5-haiku" : 200000 ,
25- "claude-3-opus" : 200000 ,
26- "claude-3-sonnet" : 200000 ,
2736 "claude-3-haiku" : 200000 ,
28- // OpenAI (specific before generic)
37+ "claude-3-opus" : 200000 ,
38+ // OpenAI — specific variants before generic slugs
39+ "gpt-5.2-pro" : 272000 ,
40+ "gpt-5.2-codex" : 272000 ,
41+ "gpt-5.2-chat" : 128000 ,
42+ "gpt-5.2" : 272000 ,
43+ "gpt-5.1-codex-max" : 272000 ,
44+ "gpt-5.1-codex-mini" : 272000 ,
45+ "gpt-5.1-codex" : 272000 ,
46+ "gpt-5.1-chat" : 128000 ,
47+ "gpt-5.1" : 272000 ,
48+ "gpt-5.3-codex" : 272000 ,
49+ "gpt-5-pro" : 128000 ,
50+ "gpt-5-codex" : 272000 ,
51+ "gpt-5-chat" : 128000 ,
52+ "gpt-5-mini" : 272000 ,
53+ "gpt-5-nano" : 272000 ,
54+ "gpt-5" : 272000 ,
55+ "gpt-4.1-mini" : 1047576 ,
56+ "gpt-4.1-nano" : 1047576 ,
57+ "gpt-4.1" : 1047576 ,
2958 "gpt-4o-mini" : 128000 ,
3059 "gpt-4o" : 128000 ,
3160 "gpt-4-turbo" : 128000 ,
3261 "gpt-4" : 8192 ,
3362 "gpt-3.5-turbo" : 16385 ,
63+ "o4-mini-deep-research" : 200000 ,
64+ "o4-mini-high" : 200000 ,
3465 "o4-mini" : 200000 ,
66+ "o3-deep-research" : 200000 ,
67+ "o3-pro" : 200000 ,
68+ "o3-mini-high" : 200000 ,
3569 "o3-mini" : 200000 ,
3670 o3 : 200000 ,
71+ "o1-pro" : 200000 ,
3772 "o1-mini" : 128000 ,
3873 o1 : 200000 ,
39- "o1-preview" : 128000 ,
40- // Gemini
74+ // Google Gemini — specific before generic
75+ "gemini-3.1-pro-preview" : 1048576 ,
76+ "gemini-3-pro-preview" : 1048576 ,
77+ "gemini-3-flash-preview" : 1048576 ,
78+ "gemini-2.5-pro-preview" : 1048576 ,
4179 "gemini-2.5-pro" : 1048576 ,
80+ "gemini-2.5-flash-lite" : 1048576 ,
4281 "gemini-2.5-flash" : 1048576 ,
82+ "gemini-2.0-flash-lite" : 1048576 ,
4383 "gemini-2.0-flash" : 1048576 ,
4484 "gemini-1.5-pro" : 2097152 ,
4585 "gemini-1.5-flash" : 1048576 ,
86+ // MiniMax
87+ "minimax-m2.5" : 1000000 ,
88+ "minimax-m2.5-fast" : 1000000 ,
4689} ;
4790
4891/**
@@ -70,53 +113,109 @@ export function getContextLimit(model: string): number {
70113 * Model pricing: `[inputPerMTok, outputPerMTok]` in USD.
71114 *
72115 * Keys ordered most-specific-first to avoid substring false matches
73- * (e.g. `gpt-4o-mini` before `gpt-4o`).
116+ * (e.g. `gpt-4o-mini` before `gpt-4o`, `o3-mini` before `o3` ).
74117 */
75118export const MODEL_PRICING : Record < string , [ number , number ] > = {
76- // Anthropic
119+ // Anthropic — specific point-releases before generic slugs
120+ "claude-opus-4.6" : [ 5 , 25 ] ,
121+ "claude-opus-4.5" : [ 5 , 25 ] ,
122+ "claude-opus-4.1" : [ 15 , 75 ] ,
77123 "claude-opus-4" : [ 15 , 75 ] ,
124+ "claude-sonnet-4.6" : [ 3 , 15 ] ,
125+ "claude-sonnet-4.5" : [ 3 , 15 ] ,
78126 "claude-sonnet-4" : [ 3 , 15 ] ,
127+ "claude-haiku-4.5" : [ 1 , 5 ] ,
79128 "claude-haiku-4" : [ 0.8 , 4 ] ,
129+ "claude-3-7-sonnet" : [ 3 , 15 ] ,
80130 "claude-3-5-sonnet" : [ 3 , 15 ] ,
81131 "claude-3-5-haiku" : [ 0.8 , 4 ] ,
82- "claude-3-opus" : [ 15 , 75 ] ,
83- "claude-3-sonnet" : [ 3 , 15 ] ,
84132 "claude-3-haiku" : [ 0.25 , 1.25 ] ,
85- // OpenAI
133+ "claude-3-opus" : [ 15 , 75 ] ,
134+ // OpenAI — specific variants before generic slugs
135+ "gpt-5.2-pro" : [ 21 , 168 ] ,
136+ "gpt-5.2-codex" : [ 1.75 , 14 ] ,
137+ "gpt-5.2-chat" : [ 1.75 , 14 ] ,
138+ "gpt-5.2" : [ 1.75 , 14 ] ,
139+ "gpt-5.1-codex-max" : [ 1.25 , 10 ] ,
140+ "gpt-5.1-codex-mini" : [ 0.25 , 2 ] ,
141+ "gpt-5.1-codex" : [ 1.25 , 10 ] ,
142+ "gpt-5.1-chat" : [ 1.25 , 10 ] ,
143+ "gpt-5.1" : [ 1.25 , 10 ] ,
144+ "gpt-5.3-codex" : [ 1.75 , 14 ] ,
145+ "gpt-5-pro" : [ 15 , 120 ] ,
146+ "gpt-5-codex" : [ 1.25 , 10 ] ,
147+ "gpt-5-chat" : [ 1.25 , 10 ] ,
148+ "gpt-5-mini" : [ 0.25 , 2 ] ,
149+ "gpt-5-nano" : [ 0.05 , 0.4 ] ,
150+ "gpt-5" : [ 1.25 , 10 ] ,
151+ "gpt-4.1-mini" : [ 0.4 , 1.6 ] ,
152+ "gpt-4.1-nano" : [ 0.1 , 0.4 ] ,
153+ "gpt-4.1" : [ 2.0 , 8.0 ] ,
86154 "gpt-4o-mini" : [ 0.15 , 0.6 ] ,
87155 "gpt-4o" : [ 2.5 , 10 ] ,
88156 "gpt-4-turbo" : [ 10 , 30 ] ,
89157 "gpt-4" : [ 30 , 60 ] ,
158+ "gpt-3.5-turbo" : [ 0.5 , 1.5 ] ,
159+ "o4-mini-deep-research" : [ 2 , 8 ] ,
160+ "o4-mini-high" : [ 1.1 , 4.4 ] ,
90161 "o4-mini" : [ 1.1 , 4.4 ] ,
162+ "o3-deep-research" : [ 10 , 40 ] ,
163+ "o3-pro" : [ 20 , 80 ] ,
164+ "o3-mini-high" : [ 1.1 , 4.4 ] ,
91165 "o3-mini" : [ 1.1 , 4.4 ] ,
92- o3 : [ 10 , 40 ] ,
93- "o1-mini" : [ 3 , 12 ] ,
166+ o3 : [ 2 , 8 ] ,
167+ "o1-pro" : [ 150 , 600 ] ,
168+ "o1-mini" : [ 1.1 , 4.4 ] ,
94169 o1 : [ 15 , 60 ] ,
95- "o1-preview" : [ 15 , 60 ] ,
96- "gpt-3.5-turbo" : [ 0.5 , 1.5 ] ,
97- // Gemini
170+ // Google Gemini — specific before generic
171+ "gemini-3.1-pro-preview" : [ 2 , 12 ] ,
172+ "gemini-3-pro-preview" : [ 2 , 12 ] ,
173+ "gemini-3-flash-preview" : [ 0.5 , 3 ] ,
174+ "gemini-2.5-pro-preview" : [ 1.25 , 10 ] ,
98175 "gemini-2.5-pro" : [ 1.25 , 10 ] ,
99- "gemini-2.5-flash" : [ 0.15 , 0.6 ] ,
176+ "gemini-2.5-flash-lite" : [ 0.1 , 0.4 ] ,
177+ "gemini-2.5-flash" : [ 0.3 , 2.5 ] ,
178+ "gemini-2.0-flash-lite" : [ 0.075 , 0.3 ] ,
100179 "gemini-2.0-flash" : [ 0.1 , 0.4 ] ,
101180 "gemini-1.5-pro" : [ 1.25 , 5 ] ,
102181 "gemini-1.5-flash" : [ 0.075 , 0.3 ] ,
103- // MiniMax (not in context-lens)
182+ // MiniMax
104183 "minimax-m2.5" : [ 0.8 , 8 ] ,
105184 "minimax-m2.5-fast" : [ 0.4 , 4 ] ,
106185} ;
107186
187+ /**
188+ * Cache pricing multipliers by provider prefix.
189+ *
190+ * Each entry maps a model key prefix to `[readMultiplier, writeMultiplier]`
191+ * relative to the base input price.
192+ * - Anthropic: reads at 10% of base input, writes at 25%
193+ * - Gemini: cached content at 25% of base input, no write billing
194+ */
195+ const CACHE_PRICING : Record < string , [ number , number ] > = {
196+ "claude-" : [ 0.1 , 0.25 ] ,
197+ "gemini-" : [ 0.25 , 0 ] ,
198+ } ;
199+
200+ function getCacheMultipliers ( modelKey : string ) : [ number , number ] {
201+ for ( const [ prefix , multipliers ] of Object . entries ( CACHE_PRICING ) ) {
202+ if ( modelKey . startsWith ( prefix ) ) return multipliers ;
203+ }
204+ return [ 0 , 0 ] ;
205+ }
206+
108207/**
109208 * Estimate cost in USD for a request/response token pair using `MODEL_PRICING`.
110209 *
111- * Cache pricing (Anthropic) :
112- * - Cache reads: 10% of base input price (0.1x)
113- * - Cache writes: 25% of base input price (0.25x)
210+ * Cache pricing varies by provider :
211+ * - Anthropic: cache reads at 10% of base input, writes at 25%
212+ * - Gemini: cached content at 25% of base input, no write cost
114213 *
115214 * @param model - Model identifier (substring matched against known keys).
116215 * @param inputTokens - Input/prompt tokens (non-cached).
117216 * @param outputTokens - Output/completion tokens.
118- * @param cacheReadTokens - Cache read tokens (charged at 10% for Anthropic) .
119- * @param cacheWriteTokens - Cache write tokens (charged at 25% for Anthropic) .
217+ * @param cacheReadTokens - Cache read tokens.
218+ * @param cacheWriteTokens - Cache write tokens.
120219 * @returns Cost in USD, rounded to 6 decimals; `null` if the model is unknown.
121220 */
122221export function estimateCost (
@@ -128,10 +227,9 @@ export function estimateCost(
128227) : number | null {
129228 for ( const [ key , [ inp , out ] ] of Object . entries ( MODEL_PRICING ) ) {
130229 if ( model . includes ( key ) ) {
131- // Anthropic models have cache pricing (10% for reads, 25% for writes)
132- const isClaude = key . startsWith ( "claude-" ) ;
133- const cacheReadCost = isClaude ? cacheReadTokens * inp * 0.1 : 0 ;
134- const cacheWriteCost = isClaude ? cacheWriteTokens * inp * 0.25 : 0 ;
230+ const [ readMul , writeMul ] = getCacheMultipliers ( key ) ;
231+ const cacheReadCost = cacheReadTokens * inp * readMul ;
232+ const cacheWriteCost = cacheWriteTokens * inp * writeMul ;
135233
136234 return (
137235 Math . round (
0 commit comments