Skip to content

Commit 921cc45

Browse files
committed
fix: add tiered pricing support for models with different token tier rates
- Updated calculateApiCostAnthropic and calculateApiCostOpenAI functions to check for tier pricing based on total input tokens - Added getTieredPricing helper function that finds appropriate tier based on token count - Added comprehensive tests for tiered pricing scenarios - This fixes pricing calculations for Claude Sonnet 4/4.5 and Gemini models when input exceeds 200K tokens Fixes #8982
1 parent d0e519d commit 921cc45

File tree

2 files changed

+241
-4
lines changed

2 files changed

+241
-4
lines changed

src/shared/cost.ts

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,55 @@ export interface ApiCostResult {
66
totalCost: number
77
}
88

9+
/**
10+
* Finds the appropriate pricing tier based on the total input tokens.
11+
* Returns the prices from the matching tier, or the base prices if no tiers are defined.
12+
*/
13+
function getTieredPricing(
14+
modelInfo: ModelInfo,
15+
totalInputTokens: number,
16+
): {
17+
inputPrice: number | undefined
18+
outputPrice: number | undefined
19+
cacheWritesPrice: number | undefined
20+
cacheReadsPrice: number | undefined
21+
} {
22+
// If there are no tiers defined, use the base prices
23+
if (!modelInfo.tiers || modelInfo.tiers.length === 0) {
24+
return {
25+
inputPrice: modelInfo.inputPrice,
26+
outputPrice: modelInfo.outputPrice,
27+
cacheWritesPrice: modelInfo.cacheWritesPrice,
28+
cacheReadsPrice: modelInfo.cacheReadsPrice,
29+
}
30+
}
31+
32+
// Find the appropriate tier based on the total input tokens
33+
// Tiers are checked in order, and we use the first tier where the token count
34+
// is less than or equal to the tier's context window
35+
const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow)
36+
37+
if (tier) {
38+
// Use tier prices, falling back to base prices if not defined in the tier
39+
return {
40+
inputPrice: tier.inputPrice ?? modelInfo.inputPrice,
41+
outputPrice: tier.outputPrice ?? modelInfo.outputPrice,
42+
cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
43+
cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
44+
}
45+
}
46+
47+
// If no tier matches (all tiers have smaller context windows than the token count),
48+
// use the last (highest) tier's prices
49+
const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1]
50+
return {
51+
inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice,
52+
outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice,
53+
cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
54+
cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
55+
}
56+
}
57+
958
function calculateApiCostInternal(
1059
modelInfo: ModelInfo,
1160
inputTokens: number,
@@ -15,10 +64,13 @@ function calculateApiCostInternal(
1564
totalInputTokens: number,
1665
totalOutputTokens: number,
1766
): ApiCostResult {
18-
const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
19-
const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
20-
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
21-
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
67+
// Get the appropriate prices based on the total input tokens (for tiered pricing)
68+
const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens)
69+
70+
const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
71+
const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
72+
const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens
73+
const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens
2274
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
2375

2476
return {

src/utils/__tests__/cost.spec.ts

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,191 @@ describe("Cost Utility", () => {
220220
expect(result.totalCost).toBe(0.0105)
221221
expect(result.totalInputTokens).toBe(6000) // Total already includes cache
222222
expect(result.totalOutputTokens).toBe(500)
223+
224+
describe("tiered pricing", () => {
225+
const modelWithTiers: ModelInfo = {
226+
contextWindow: 200_000,
227+
supportsImages: true,
228+
supportsPromptCache: true,
229+
inputPrice: 3.0, // $3 per million tokens (<= 200K)
230+
outputPrice: 15.0, // $15 per million tokens (<= 200K)
231+
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
232+
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
233+
tiers: [
234+
{
235+
contextWindow: 1_000_000, // 1M tokens
236+
inputPrice: 6.0, // $6 per million tokens (> 200K)
237+
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
238+
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
239+
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
240+
},
241+
],
242+
}
243+
244+
it("should use base prices when total input tokens are below 200K", () => {
245+
const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000)
246+
247+
// Total input: 50K + 50K + 50K = 150K (below 200K threshold)
248+
// Should use base prices: $3/$15
249+
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
250+
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
251+
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
252+
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
253+
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
254+
expect(result.totalInputTokens).toBe(150_000)
255+
expect(result.totalOutputTokens).toBe(10_000)
256+
expect(result.totalCost).toBeCloseTo(0.5025, 6)
257+
})
258+
259+
it("should use tier prices when total input tokens exceed 200K", () => {
260+
const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000)
261+
262+
// Total input: 100K + 100K + 100K = 300K (above 200K, below 1M)
263+
// Should use tier prices: $6/$22.50
264+
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
265+
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
266+
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
267+
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
268+
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
269+
expect(result.totalInputTokens).toBe(300_000)
270+
expect(result.totalOutputTokens).toBe(20_000)
271+
expect(result.totalCost).toBeCloseTo(1.86, 6)
272+
})
273+
274+
it("should use the highest tier prices when exceeding all tier thresholds", () => {
275+
const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000)
276+
277+
// Total input: 500K + 300K + 300K = 1.1M (above 1M threshold)
278+
// Should use highest tier prices: $6/$22.50 (last tier)
279+
// Input cost: (6.0 / 1_000_000) * 500_000 = 3.0
280+
// Output cost: (22.5 / 1_000_000) * 50_000 = 1.125
281+
// Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25
282+
// Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18
283+
// Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555
284+
expect(result.totalInputTokens).toBe(1_100_000)
285+
expect(result.totalOutputTokens).toBe(50_000)
286+
expect(result.totalCost).toBeCloseTo(6.555, 6)
287+
})
288+
289+
it("should handle partial tier definitions", () => {
290+
// Model where tier only overrides some prices
291+
const modelPartialTiers: ModelInfo = {
292+
contextWindow: 200_000,
293+
supportsImages: true,
294+
supportsPromptCache: true,
295+
inputPrice: 3.0,
296+
outputPrice: 15.0,
297+
cacheWritesPrice: 3.75,
298+
cacheReadsPrice: 0.3,
299+
tiers: [
300+
{
301+
contextWindow: 1_000_000,
302+
inputPrice: 6.0, // Only input price changes
303+
// output, cacheWrites, cacheReads prices should fall back to base
304+
},
305+
],
306+
}
307+
308+
const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000)
309+
310+
// Total input: 300K (uses tier)
311+
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price)
312+
// Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price)
313+
// Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price)
314+
// Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price)
315+
// Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305
316+
expect(result.totalInputTokens).toBe(300_000)
317+
expect(result.totalOutputTokens).toBe(20_000)
318+
expect(result.totalCost).toBeCloseTo(1.305, 6)
319+
})
320+
321+
it("should handle multiple tiers correctly", () => {
322+
const modelMultipleTiers: ModelInfo = {
323+
contextWindow: 128_000,
324+
supportsImages: true,
325+
supportsPromptCache: true,
326+
inputPrice: 0.075, // <= 128K
327+
outputPrice: 0.3,
328+
tiers: [
329+
{
330+
contextWindow: 200_000, // First tier
331+
inputPrice: 0.15,
332+
outputPrice: 0.6,
333+
},
334+
{
335+
contextWindow: 1_000_000, // Second tier
336+
inputPrice: 0.3,
337+
outputPrice: 1.2,
338+
},
339+
],
340+
}
341+
342+
// Test below first threshold (128K)
343+
let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000)
344+
expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6)
345+
346+
// Test between first and second threshold (150K)
347+
result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000)
348+
expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6)
349+
350+
// Test above second threshold (500K)
351+
result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000)
352+
expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6)
353+
})
354+
})
355+
356+
describe("tiered pricing for OpenAI", () => {
357+
const modelWithTiers: ModelInfo = {
358+
contextWindow: 200_000,
359+
supportsImages: true,
360+
supportsPromptCache: true,
361+
inputPrice: 3.0, // $3 per million tokens (<= 200K)
362+
outputPrice: 15.0, // $15 per million tokens (<= 200K)
363+
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
364+
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
365+
tiers: [
366+
{
367+
contextWindow: 1_000_000, // 1M tokens
368+
inputPrice: 6.0, // $6 per million tokens (> 200K)
369+
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
370+
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
371+
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
372+
},
373+
],
374+
}
375+
376+
it("should use tier prices for OpenAI when total input tokens exceed threshold", () => {
377+
// Total input: 300K (includes all tokens)
378+
const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000)
379+
380+
// Total input is 300K (above 200K, below 1M) - uses tier pricing
381+
// Non-cached input: 300K - 100K - 100K = 100K
382+
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
383+
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
384+
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
385+
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
386+
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
387+
expect(result.totalInputTokens).toBe(300_000)
388+
expect(result.totalOutputTokens).toBe(20_000)
389+
expect(result.totalCost).toBeCloseTo(1.86, 6)
390+
})
391+
392+
it("should use base prices for OpenAI when total input tokens are below threshold", () => {
393+
// Total input: 150K (includes all tokens)
394+
const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000)
395+
396+
// Total input is 150K (below 200K) - uses base pricing
397+
// Non-cached input: 150K - 50K - 50K = 50K
398+
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
399+
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
400+
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
401+
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
402+
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
403+
expect(result.totalInputTokens).toBe(150_000)
404+
expect(result.totalOutputTokens).toBe(10_000)
405+
expect(result.totalCost).toBeCloseTo(0.5025, 6)
406+
})
407+
})
223408
})
224409
})
225410
})

0 commit comments

Comments
 (0)