feat(core): update models — newer Claude/GPT-5/Gemini, fix o1-mini pricing, Gemini cache pricing

larsderidder · larsderidder · commit b461805b4b2f · 2026-02-26T22:45:38.000+01:00
diff --git a/packages/core/package.json b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@contextio/core",
-  "version": "0.2.1",
+  "version": "0.2.2",
   "description": "Shared types, routing, and header utilities for the @context ecosystem. Zero dependencies.",
   "license": "MIT",
   "type": "module",
diff --git a/packages/core/src/models.ts b/packages/core/src/models.ts
@@ -4,6 +4,10 @@
  * Both lookup tables use substring matching, so key order matters:
  * "gpt-4o-mini" must come before "gpt-4o" or the shorter key would
  * match first. Keep entries most-specific-first within each provider.
+ *
+ * Prices sourced from OpenRouter and litellm model_prices_and_context_window.json.
+ * Keys are matched as substrings of the incoming model string, so they work
+ * for both direct-API model IDs and OpenRouter-prefixed IDs.
  */
 
 // ----------------------------------------------------------------------------
@@ -17,32 +21,71 @@
  */
 export const CONTEXT_LIMITS: Record<string, number> = {
   // Anthropic
+  "claude-opus-4.6": 1000000,
+  "claude-opus-4.5": 200000,
+  "claude-opus-4.1": 200000,
   "claude-opus-4": 200000,
-  "claude-sonnet-4": 200000,
+  "claude-sonnet-4.6": 1000000,
+  "claude-sonnet-4.5": 1000000,
+  "claude-sonnet-4": 1000000,
+  "claude-haiku-4.5": 200000,
   "claude-haiku-4": 200000,
+  "claude-3-7-sonnet": 200000,
   "claude-3-5-sonnet": 200000,
   "claude-3-5-haiku": 200000,
-  "claude-3-opus": 200000,
-  "claude-3-sonnet": 200000,
   "claude-3-haiku": 200000,
-  // OpenAI (specific before generic)
+  "claude-3-opus": 200000,
+  // OpenAI — specific variants before generic slugs
+  "gpt-5.2-pro": 272000,
+  "gpt-5.2-codex": 272000,
+  "gpt-5.2-chat": 128000,
+  "gpt-5.2": 272000,
+  "gpt-5.1-codex-max": 272000,
+  "gpt-5.1-codex-mini": 272000,
+  "gpt-5.1-codex": 272000,
+  "gpt-5.1-chat": 128000,
+  "gpt-5.1": 272000,
+  "gpt-5.3-codex": 272000,
+  "gpt-5-pro": 128000,
+  "gpt-5-codex": 272000,
+  "gpt-5-chat": 128000,
+  "gpt-5-mini": 272000,
+  "gpt-5-nano": 272000,
+  "gpt-5": 272000,
+  "gpt-4.1-mini": 1047576,
+  "gpt-4.1-nano": 1047576,
+  "gpt-4.1": 1047576,
   "gpt-4o-mini": 128000,
   "gpt-4o": 128000,
   "gpt-4-turbo": 128000,
   "gpt-4": 8192,
   "gpt-3.5-turbo": 16385,
+  "o4-mini-deep-research": 200000,
+  "o4-mini-high": 200000,
   "o4-mini": 200000,
+  "o3-deep-research": 200000,
+  "o3-pro": 200000,
+  "o3-mini-high": 200000,
   "o3-mini": 200000,
   o3: 200000,
+  "o1-pro": 200000,
   "o1-mini": 128000,
   o1: 200000,
-  "o1-preview": 128000,
-  // Gemini
+  // Google Gemini — specific before generic
+  "gemini-3.1-pro-preview": 1048576,
+  "gemini-3-pro-preview": 1048576,
+  "gemini-3-flash-preview": 1048576,
+  "gemini-2.5-pro-preview": 1048576,
   "gemini-2.5-pro": 1048576,
+  "gemini-2.5-flash-lite": 1048576,
   "gemini-2.5-flash": 1048576,
+  "gemini-2.0-flash-lite": 1048576,
   "gemini-2.0-flash": 1048576,
   "gemini-1.5-pro": 2097152,
   "gemini-1.5-flash": 1048576,
+  // MiniMax
+  "minimax-m2.5": 1000000,
+  "minimax-m2.5-fast": 1000000,
 };
 
 /**
@@ -70,53 +113,109 @@ export function getContextLimit(model: string): number {
  * Model pricing: `[inputPerMTok, outputPerMTok]` in USD.
  *
  * Keys ordered most-specific-first to avoid substring false matches
- * (e.g. `gpt-4o-mini` before `gpt-4o`).
+ * (e.g. `gpt-4o-mini` before `gpt-4o`, `o3-mini` before `o3`).
  */
 export const MODEL_PRICING: Record<string, [number, number]> = {
-  // Anthropic
+  // Anthropic — specific point-releases before generic slugs
+  "claude-opus-4.6": [5, 25],
+  "claude-opus-4.5": [5, 25],
+  "claude-opus-4.1": [15, 75],
   "claude-opus-4": [15, 75],
+  "claude-sonnet-4.6": [3, 15],
+  "claude-sonnet-4.5": [3, 15],
   "claude-sonnet-4": [3, 15],
+  "claude-haiku-4.5": [1, 5],
   "claude-haiku-4": [0.8, 4],
+  "claude-3-7-sonnet": [3, 15],
   "claude-3-5-sonnet": [3, 15],
   "claude-3-5-haiku": [0.8, 4],
-  "claude-3-opus": [15, 75],
-  "claude-3-sonnet": [3, 15],
   "claude-3-haiku": [0.25, 1.25],
-  // OpenAI
+  "claude-3-opus": [15, 75],
+  // OpenAI — specific variants before generic slugs
+  "gpt-5.2-pro": [21, 168],
+  "gpt-5.2-codex": [1.75, 14],
+  "gpt-5.2-chat": [1.75, 14],
+  "gpt-5.2": [1.75, 14],
+  "gpt-5.1-codex-max": [1.25, 10],
+  "gpt-5.1-codex-mini": [0.25, 2],
+  "gpt-5.1-codex": [1.25, 10],
+  "gpt-5.1-chat": [1.25, 10],
+  "gpt-5.1": [1.25, 10],
+  "gpt-5.3-codex": [1.75, 14],
+  "gpt-5-pro": [15, 120],
+  "gpt-5-codex": [1.25, 10],
+  "gpt-5-chat": [1.25, 10],
+  "gpt-5-mini": [0.25, 2],
+  "gpt-5-nano": [0.05, 0.4],
+  "gpt-5": [1.25, 10],
+  "gpt-4.1-mini": [0.4, 1.6],
+  "gpt-4.1-nano": [0.1, 0.4],
+  "gpt-4.1": [2.0, 8.0],
   "gpt-4o-mini": [0.15, 0.6],
   "gpt-4o": [2.5, 10],
   "gpt-4-turbo": [10, 30],
   "gpt-4": [30, 60],
+  "gpt-3.5-turbo": [0.5, 1.5],
+  "o4-mini-deep-research": [2, 8],
+  "o4-mini-high": [1.1, 4.4],
   "o4-mini": [1.1, 4.4],
+  "o3-deep-research": [10, 40],
+  "o3-pro": [20, 80],
+  "o3-mini-high": [1.1, 4.4],
   "o3-mini": [1.1, 4.4],
-  o3: [10, 40],
-  "o1-mini": [3, 12],
+  o3: [2, 8],
+  "o1-pro": [150, 600],
+  "o1-mini": [1.1, 4.4],
   o1: [15, 60],
-  "o1-preview": [15, 60],
-  "gpt-3.5-turbo": [0.5, 1.5],
-  // Gemini
+  // Google Gemini — specific before generic
+  "gemini-3.1-pro-preview": [2, 12],
+  "gemini-3-pro-preview": [2, 12],
+  "gemini-3-flash-preview": [0.5, 3],
+  "gemini-2.5-pro-preview": [1.25, 10],
   "gemini-2.5-pro": [1.25, 10],
-  "gemini-2.5-flash": [0.15, 0.6],
+  "gemini-2.5-flash-lite": [0.1, 0.4],
+  "gemini-2.5-flash": [0.3, 2.5],
+  "gemini-2.0-flash-lite": [0.075, 0.3],
   "gemini-2.0-flash": [0.1, 0.4],
   "gemini-1.5-pro": [1.25, 5],
   "gemini-1.5-flash": [0.075, 0.3],
-  // MiniMax (not in context-lens)
+  // MiniMax
   "minimax-m2.5": [0.8, 8],
   "minimax-m2.5-fast": [0.4, 4],
 };
 
+/**
+ * Cache pricing multipliers by provider prefix.
+ *
+ * Each entry maps a model key prefix to `[readMultiplier, writeMultiplier]`
+ * relative to the base input price.
+ * - Anthropic: reads at 10% of base input, writes at 25%
+ * - Gemini: cached content at 25% of base input, no write billing
+ */
+const CACHE_PRICING: Record<string, [number, number]> = {
+  "claude-": [0.1, 0.25],
+  "gemini-": [0.25, 0],
+};
+
+function getCacheMultipliers(modelKey: string): [number, number] {
+  for (const [prefix, multipliers] of Object.entries(CACHE_PRICING)) {
+    if (modelKey.startsWith(prefix)) return multipliers;
+  }
+  return [0, 0];
+}
+
 /**
  * Estimate cost in USD for a request/response token pair using `MODEL_PRICING`.
  *
- * Cache pricing (Anthropic):
- * - Cache reads: 10% of base input price (0.1x)
- * - Cache writes: 25% of base input price (0.25x)
+ * Cache pricing varies by provider:
+ * - Anthropic: cache reads at 10% of base input, writes at 25%
+ * - Gemini: cached content at 25% of base input, no write cost
  *
  * @param model - Model identifier (substring matched against known keys).
  * @param inputTokens - Input/prompt tokens (non-cached).
  * @param outputTokens - Output/completion tokens.
- * @param cacheReadTokens - Cache read tokens (charged at 10% for Anthropic).
- * @param cacheWriteTokens - Cache write tokens (charged at 25% for Anthropic).
+ * @param cacheReadTokens - Cache read tokens.
+ * @param cacheWriteTokens - Cache write tokens.
  * @returns Cost in USD, rounded to 6 decimals; `null` if the model is unknown.
  */
 export function estimateCost(
@@ -128,10 +227,9 @@ export function estimateCost(
 ): number | null {
   for (const [key, [inp, out]] of Object.entries(MODEL_PRICING)) {
     if (model.includes(key)) {
-      // Anthropic models have cache pricing (10% for reads, 25% for writes)
-      const isClaude = key.startsWith("claude-");
-      const cacheReadCost = isClaude ? cacheReadTokens * inp * 0.1 : 0;
-      const cacheWriteCost = isClaude ? cacheWriteTokens * inp * 0.25 : 0;
+      const [readMul, writeMul] = getCacheMultipliers(key);
+      const cacheReadCost = cacheReadTokens * inp * readMul;
+      const cacheWriteCost = cacheWriteTokens * inp * writeMul;
 
       return (
         Math.round(
diff --git a/packages/core/test/models.test.ts b/packages/core/test/models.test.ts
@@ -13,7 +13,7 @@ describe("models.ts", () => {
   describe("getContextLimit", () => {
     it("returns exact match for claude models", () => {
       assert.equal(getContextLimit("claude-opus-4-20250514"), 200000);
-      assert.equal(getContextLimit("claude-sonnet-4-20250514"), 200000);
+      assert.equal(getContextLimit("claude-sonnet-4-20250514"), 1000000);
       assert.equal(getContextLimit("claude-haiku-4-20250320"), 200000);
     });
 
@@ -152,9 +152,11 @@ describe("models.ts", () => {
   describe("CONTEXT_LIMITS", () => {
     it("has expected entries", () => {
       assert.equal(CONTEXT_LIMITS["claude-opus-4"], 200000);
+      assert.equal(CONTEXT_LIMITS["claude-sonnet-4"], 1000000);
       assert.equal(CONTEXT_LIMITS["gpt-4o"], 128000);
       assert.equal(CONTEXT_LIMITS["o1"], 200000);
       assert.equal(CONTEXT_LIMITS["gemini-1.5-pro"], 2097152);
+      assert.equal(CONTEXT_LIMITS["gemini-2.5-flash"], 1048576);
     });
   });
 });

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@contextio/core",`
`3`		`- "version": "0.2.1",`
	`3`	`+ "version": "0.2.2",`
`4`	`4`	`"description": "Shared types, routing, and header utilities for the @context ecosystem. Zero dependencies.",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"type": "module",`