Skip to content

Commit b461805

Browse files
committed
feat(core): update models — newer Claude/GPT-5/Gemini, fix o1-mini pricing, Gemini cache pricing
1 parent c968e2b commit b461805

File tree

3 files changed

+129
-29
lines changed

3 files changed

+129
-29
lines changed

packages/core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@contextio/core",
3-
"version": "0.2.1",
3+
"version": "0.2.2",
44
"description": "Shared types, routing, and header utilities for the @context ecosystem. Zero dependencies.",
55
"license": "MIT",
66
"type": "module",

packages/core/src/models.ts

Lines changed: 125 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
* Both lookup tables use substring matching, so key order matters:
55
* "gpt-4o-mini" must come before "gpt-4o" or the shorter key would
66
* match first. Keep entries most-specific-first within each provider.
7+
*
8+
* Prices sourced from OpenRouter and litellm model_prices_and_context_window.json.
9+
* Keys are matched as substrings of the incoming model string, so they work
10+
* for both direct-API model IDs and OpenRouter-prefixed IDs.
711
*/
812

913
// ----------------------------------------------------------------------------
@@ -17,32 +21,71 @@
1721
*/
1822
export const CONTEXT_LIMITS: Record<string, number> = {
1923
// Anthropic
24+
"claude-opus-4.6": 1000000,
25+
"claude-opus-4.5": 200000,
26+
"claude-opus-4.1": 200000,
2027
"claude-opus-4": 200000,
21-
"claude-sonnet-4": 200000,
28+
"claude-sonnet-4.6": 1000000,
29+
"claude-sonnet-4.5": 1000000,
30+
"claude-sonnet-4": 1000000,
31+
"claude-haiku-4.5": 200000,
2232
"claude-haiku-4": 200000,
33+
"claude-3-7-sonnet": 200000,
2334
"claude-3-5-sonnet": 200000,
2435
"claude-3-5-haiku": 200000,
25-
"claude-3-opus": 200000,
26-
"claude-3-sonnet": 200000,
2736
"claude-3-haiku": 200000,
28-
// OpenAI (specific before generic)
37+
"claude-3-opus": 200000,
38+
// OpenAI — specific variants before generic slugs
39+
"gpt-5.2-pro": 272000,
40+
"gpt-5.2-codex": 272000,
41+
"gpt-5.2-chat": 128000,
42+
"gpt-5.2": 272000,
43+
"gpt-5.1-codex-max": 272000,
44+
"gpt-5.1-codex-mini": 272000,
45+
"gpt-5.1-codex": 272000,
46+
"gpt-5.1-chat": 128000,
47+
"gpt-5.1": 272000,
48+
"gpt-5.3-codex": 272000,
49+
"gpt-5-pro": 128000,
50+
"gpt-5-codex": 272000,
51+
"gpt-5-chat": 128000,
52+
"gpt-5-mini": 272000,
53+
"gpt-5-nano": 272000,
54+
"gpt-5": 272000,
55+
"gpt-4.1-mini": 1047576,
56+
"gpt-4.1-nano": 1047576,
57+
"gpt-4.1": 1047576,
2958
"gpt-4o-mini": 128000,
3059
"gpt-4o": 128000,
3160
"gpt-4-turbo": 128000,
3261
"gpt-4": 8192,
3362
"gpt-3.5-turbo": 16385,
63+
"o4-mini-deep-research": 200000,
64+
"o4-mini-high": 200000,
3465
"o4-mini": 200000,
66+
"o3-deep-research": 200000,
67+
"o3-pro": 200000,
68+
"o3-mini-high": 200000,
3569
"o3-mini": 200000,
3670
o3: 200000,
71+
"o1-pro": 200000,
3772
"o1-mini": 128000,
3873
o1: 200000,
39-
"o1-preview": 128000,
40-
// Gemini
74+
// Google Gemini — specific before generic
75+
"gemini-3.1-pro-preview": 1048576,
76+
"gemini-3-pro-preview": 1048576,
77+
"gemini-3-flash-preview": 1048576,
78+
"gemini-2.5-pro-preview": 1048576,
4179
"gemini-2.5-pro": 1048576,
80+
"gemini-2.5-flash-lite": 1048576,
4281
"gemini-2.5-flash": 1048576,
82+
"gemini-2.0-flash-lite": 1048576,
4383
"gemini-2.0-flash": 1048576,
4484
"gemini-1.5-pro": 2097152,
4585
"gemini-1.5-flash": 1048576,
86+
// MiniMax
87+
"minimax-m2.5": 1000000,
88+
"minimax-m2.5-fast": 1000000,
4689
};
4790

4891
/**
@@ -70,53 +113,109 @@ export function getContextLimit(model: string): number {
70113
* Model pricing: `[inputPerMTok, outputPerMTok]` in USD.
71114
*
72115
* Keys ordered most-specific-first to avoid substring false matches
73-
* (e.g. `gpt-4o-mini` before `gpt-4o`).
116+
* (e.g. `gpt-4o-mini` before `gpt-4o`, `o3-mini` before `o3`).
74117
*/
75118
export const MODEL_PRICING: Record<string, [number, number]> = {
76-
// Anthropic
119+
// Anthropic — specific point-releases before generic slugs
120+
"claude-opus-4.6": [5, 25],
121+
"claude-opus-4.5": [5, 25],
122+
"claude-opus-4.1": [15, 75],
77123
"claude-opus-4": [15, 75],
124+
"claude-sonnet-4.6": [3, 15],
125+
"claude-sonnet-4.5": [3, 15],
78126
"claude-sonnet-4": [3, 15],
127+
"claude-haiku-4.5": [1, 5],
79128
"claude-haiku-4": [0.8, 4],
129+
"claude-3-7-sonnet": [3, 15],
80130
"claude-3-5-sonnet": [3, 15],
81131
"claude-3-5-haiku": [0.8, 4],
82-
"claude-3-opus": [15, 75],
83-
"claude-3-sonnet": [3, 15],
84132
"claude-3-haiku": [0.25, 1.25],
85-
// OpenAI
133+
"claude-3-opus": [15, 75],
134+
// OpenAI — specific variants before generic slugs
135+
"gpt-5.2-pro": [21, 168],
136+
"gpt-5.2-codex": [1.75, 14],
137+
"gpt-5.2-chat": [1.75, 14],
138+
"gpt-5.2": [1.75, 14],
139+
"gpt-5.1-codex-max": [1.25, 10],
140+
"gpt-5.1-codex-mini": [0.25, 2],
141+
"gpt-5.1-codex": [1.25, 10],
142+
"gpt-5.1-chat": [1.25, 10],
143+
"gpt-5.1": [1.25, 10],
144+
"gpt-5.3-codex": [1.75, 14],
145+
"gpt-5-pro": [15, 120],
146+
"gpt-5-codex": [1.25, 10],
147+
"gpt-5-chat": [1.25, 10],
148+
"gpt-5-mini": [0.25, 2],
149+
"gpt-5-nano": [0.05, 0.4],
150+
"gpt-5": [1.25, 10],
151+
"gpt-4.1-mini": [0.4, 1.6],
152+
"gpt-4.1-nano": [0.1, 0.4],
153+
"gpt-4.1": [2.0, 8.0],
86154
"gpt-4o-mini": [0.15, 0.6],
87155
"gpt-4o": [2.5, 10],
88156
"gpt-4-turbo": [10, 30],
89157
"gpt-4": [30, 60],
158+
"gpt-3.5-turbo": [0.5, 1.5],
159+
"o4-mini-deep-research": [2, 8],
160+
"o4-mini-high": [1.1, 4.4],
90161
"o4-mini": [1.1, 4.4],
162+
"o3-deep-research": [10, 40],
163+
"o3-pro": [20, 80],
164+
"o3-mini-high": [1.1, 4.4],
91165
"o3-mini": [1.1, 4.4],
92-
o3: [10, 40],
93-
"o1-mini": [3, 12],
166+
o3: [2, 8],
167+
"o1-pro": [150, 600],
168+
"o1-mini": [1.1, 4.4],
94169
o1: [15, 60],
95-
"o1-preview": [15, 60],
96-
"gpt-3.5-turbo": [0.5, 1.5],
97-
// Gemini
170+
// Google Gemini — specific before generic
171+
"gemini-3.1-pro-preview": [2, 12],
172+
"gemini-3-pro-preview": [2, 12],
173+
"gemini-3-flash-preview": [0.5, 3],
174+
"gemini-2.5-pro-preview": [1.25, 10],
98175
"gemini-2.5-pro": [1.25, 10],
99-
"gemini-2.5-flash": [0.15, 0.6],
176+
"gemini-2.5-flash-lite": [0.1, 0.4],
177+
"gemini-2.5-flash": [0.3, 2.5],
178+
"gemini-2.0-flash-lite": [0.075, 0.3],
100179
"gemini-2.0-flash": [0.1, 0.4],
101180
"gemini-1.5-pro": [1.25, 5],
102181
"gemini-1.5-flash": [0.075, 0.3],
103-
// MiniMax (not in context-lens)
182+
// MiniMax
104183
"minimax-m2.5": [0.8, 8],
105184
"minimax-m2.5-fast": [0.4, 4],
106185
};
107186

187+
/**
188+
* Cache pricing multipliers by provider prefix.
189+
*
190+
* Each entry maps a model key prefix to `[readMultiplier, writeMultiplier]`
191+
* relative to the base input price.
192+
* - Anthropic: reads at 10% of base input, writes at 25%
193+
* - Gemini: cached content at 25% of base input, no write billing
194+
*/
195+
const CACHE_PRICING: Record<string, [number, number]> = {
196+
"claude-": [0.1, 0.25],
197+
"gemini-": [0.25, 0],
198+
};
199+
200+
function getCacheMultipliers(modelKey: string): [number, number] {
201+
for (const [prefix, multipliers] of Object.entries(CACHE_PRICING)) {
202+
if (modelKey.startsWith(prefix)) return multipliers;
203+
}
204+
return [0, 0];
205+
}
206+
108207
/**
109208
* Estimate cost in USD for a request/response token pair using `MODEL_PRICING`.
110209
*
111-
* Cache pricing (Anthropic):
112-
* - Cache reads: 10% of base input price (0.1x)
113-
* - Cache writes: 25% of base input price (0.25x)
210+
* Cache pricing varies by provider:
211+
* - Anthropic: cache reads at 10% of base input, writes at 25%
212+
* - Gemini: cached content at 25% of base input, no write cost
114213
*
115214
* @param model - Model identifier (substring matched against known keys).
116215
* @param inputTokens - Input/prompt tokens (non-cached).
117216
* @param outputTokens - Output/completion tokens.
118-
* @param cacheReadTokens - Cache read tokens (charged at 10% for Anthropic).
119-
* @param cacheWriteTokens - Cache write tokens (charged at 25% for Anthropic).
217+
* @param cacheReadTokens - Cache read tokens.
218+
* @param cacheWriteTokens - Cache write tokens.
120219
* @returns Cost in USD, rounded to 6 decimals; `null` if the model is unknown.
121220
*/
122221
export function estimateCost(
@@ -128,10 +227,9 @@ export function estimateCost(
128227
): number | null {
129228
for (const [key, [inp, out]] of Object.entries(MODEL_PRICING)) {
130229
if (model.includes(key)) {
131-
// Anthropic models have cache pricing (10% for reads, 25% for writes)
132-
const isClaude = key.startsWith("claude-");
133-
const cacheReadCost = isClaude ? cacheReadTokens * inp * 0.1 : 0;
134-
const cacheWriteCost = isClaude ? cacheWriteTokens * inp * 0.25 : 0;
230+
const [readMul, writeMul] = getCacheMultipliers(key);
231+
const cacheReadCost = cacheReadTokens * inp * readMul;
232+
const cacheWriteCost = cacheWriteTokens * inp * writeMul;
135233

136234
return (
137235
Math.round(

packages/core/test/models.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ describe("models.ts", () => {
1313
describe("getContextLimit", () => {
1414
it("returns exact match for claude models", () => {
1515
assert.equal(getContextLimit("claude-opus-4-20250514"), 200000);
16-
assert.equal(getContextLimit("claude-sonnet-4-20250514"), 200000);
16+
assert.equal(getContextLimit("claude-sonnet-4-20250514"), 1000000);
1717
assert.equal(getContextLimit("claude-haiku-4-20250320"), 200000);
1818
});
1919

@@ -152,9 +152,11 @@ describe("models.ts", () => {
152152
describe("CONTEXT_LIMITS", () => {
153153
it("has expected entries", () => {
154154
assert.equal(CONTEXT_LIMITS["claude-opus-4"], 200000);
155+
assert.equal(CONTEXT_LIMITS["claude-sonnet-4"], 1000000);
155156
assert.equal(CONTEXT_LIMITS["gpt-4o"], 128000);
156157
assert.equal(CONTEXT_LIMITS["o1"], 200000);
157158
assert.equal(CONTEXT_LIMITS["gemini-1.5-pro"], 2097152);
159+
assert.equal(CONTEXT_LIMITS["gemini-2.5-flash"], 1048576);
158160
});
159161
});
160162
});

0 commit comments

Comments
 (0)