fix: add tiered pricing support for models with different token tier rates

roomote · roomote · commit 921cc451be01 · 2025-11-02T23:13:42.000Z
- Updated calculateApiCostAnthropic and calculateApiCostOpenAI functions to check for tier pricing based on total input tokens - Added getTieredPricing helper function that finds appropriate tier based on token count - Added comprehensive tests for tiered pricing scenarios - This fixes pricing calculations for Claude Sonnet 4/4.5 and Gemini models when input exceeds 200K tokens Fixes #8982
diff --git a/src/shared/cost.ts b/src/shared/cost.ts
@@ -6,6 +6,55 @@ export interface ApiCostResult {
 	totalCost: number
 }
 
+/**
+ * Finds the appropriate pricing tier based on the total input tokens.
+ * Returns the prices from the matching tier, or the base prices if no tiers are defined.
+ */
+function getTieredPricing(
+	modelInfo: ModelInfo,
+	totalInputTokens: number,
+): {
+	inputPrice: number | undefined
+	outputPrice: number | undefined
+	cacheWritesPrice: number | undefined
+	cacheReadsPrice: number | undefined
+} {
+	// If there are no tiers defined, use the base prices
+	if (!modelInfo.tiers || modelInfo.tiers.length === 0) {
+		return {
+			inputPrice: modelInfo.inputPrice,
+			outputPrice: modelInfo.outputPrice,
+			cacheWritesPrice: modelInfo.cacheWritesPrice,
+			cacheReadsPrice: modelInfo.cacheReadsPrice,
+		}
+	}
+
+	// Find the appropriate tier based on the total input tokens
+	// Tiers are checked in order, and we use the first tier where the token count
+	// is less than or equal to the tier's context window
+	const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow)
+
+	if (tier) {
+		// Use tier prices, falling back to base prices if not defined in the tier
+		return {
+			inputPrice: tier.inputPrice ?? modelInfo.inputPrice,
+			outputPrice: tier.outputPrice ?? modelInfo.outputPrice,
+			cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
+			cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
+		}
+	}
+
+	// If no tier matches (all tiers have smaller context windows than the token count),
+	// use the last (highest) tier's prices
+	const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1]
+	return {
+		inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice,
+		outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice,
+		cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
+		cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
+	}
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
@@ -15,10 +64,13 @@ function calculateApiCostInternal(
 	totalInputTokens: number,
 	totalOutputTokens: number,
 ): ApiCostResult {
-	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
-	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
-	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
-	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
+	// Get the appropriate prices based on the total input tokens (for tiered pricing)
+	const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens)
+
+	const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
+	const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
+	const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens
+	const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 
 	return {
diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts
@@ -220,6 +220,191 @@ describe("Cost Utility", () => {
 			expect(result.totalCost).toBe(0.0105)
 			expect(result.totalInputTokens).toBe(6000) // Total already includes cache
 			expect(result.totalOutputTokens).toBe(500)
+
+			describe("tiered pricing", () => {
+				const modelWithTiers: ModelInfo = {
+					contextWindow: 200_000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					inputPrice: 3.0, // $3 per million tokens (<= 200K)
+					outputPrice: 15.0, // $15 per million tokens (<= 200K)
+					cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
+					cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
+					tiers: [
+						{
+							contextWindow: 1_000_000, // 1M tokens
+							inputPrice: 6.0, // $6 per million tokens (> 200K)
+							outputPrice: 22.5, // $22.50 per million tokens (> 200K)
+							cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
+							cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
+						},
+					],
+				}
+
+				it("should use base prices when total input tokens are below 200K", () => {
+					const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000)
+
+					// Total input: 50K + 50K + 50K = 150K (below 200K threshold)
+					// Should use base prices: $3/$15
+					// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
+					// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
+					// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
+					// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
+					// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
+					expect(result.totalInputTokens).toBe(150_000)
+					expect(result.totalOutputTokens).toBe(10_000)
+					expect(result.totalCost).toBeCloseTo(0.5025, 6)
+				})
+
+				it("should use tier prices when total input tokens exceed 200K", () => {
+					const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000)
+
+					// Total input: 100K + 100K + 100K = 300K (above 200K, below 1M)
+					// Should use tier prices: $6/$22.50
+					// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
+					// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
+					// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
+					// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
+					// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
+					expect(result.totalInputTokens).toBe(300_000)
+					expect(result.totalOutputTokens).toBe(20_000)
+					expect(result.totalCost).toBeCloseTo(1.86, 6)
+				})
+
+				it("should use the highest tier prices when exceeding all tier thresholds", () => {
+					const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000)
+
+					// Total input: 500K + 300K + 300K = 1.1M (above 1M threshold)
+					// Should use highest tier prices: $6/$22.50 (last tier)
+					// Input cost: (6.0 / 1_000_000) * 500_000 = 3.0
+					// Output cost: (22.5 / 1_000_000) * 50_000 = 1.125
+					// Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25
+					// Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18
+					// Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555
+					expect(result.totalInputTokens).toBe(1_100_000)
+					expect(result.totalOutputTokens).toBe(50_000)
+					expect(result.totalCost).toBeCloseTo(6.555, 6)
+				})
+
+				it("should handle partial tier definitions", () => {
+					// Model where tier only overrides some prices
+					const modelPartialTiers: ModelInfo = {
+						contextWindow: 200_000,
+						supportsImages: true,
+						supportsPromptCache: true,
+						inputPrice: 3.0,
+						outputPrice: 15.0,
+						cacheWritesPrice: 3.75,
+						cacheReadsPrice: 0.3,
+						tiers: [
+							{
+								contextWindow: 1_000_000,
+								inputPrice: 6.0, // Only input price changes
+								// output, cacheWrites, cacheReads prices should fall back to base
+							},
+						],
+					}
+
+					const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000)
+
+					// Total input: 300K (uses tier)
+					// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price)
+					// Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price)
+					// Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price)
+					// Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price)
+					// Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305
+					expect(result.totalInputTokens).toBe(300_000)
+					expect(result.totalOutputTokens).toBe(20_000)
+					expect(result.totalCost).toBeCloseTo(1.305, 6)
+				})
+
+				it("should handle multiple tiers correctly", () => {
+					const modelMultipleTiers: ModelInfo = {
+						contextWindow: 128_000,
+						supportsImages: true,
+						supportsPromptCache: true,
+						inputPrice: 0.075, // <= 128K
+						outputPrice: 0.3,
+						tiers: [
+							{
+								contextWindow: 200_000, // First tier
+								inputPrice: 0.15,
+								outputPrice: 0.6,
+							},
+							{
+								contextWindow: 1_000_000, // Second tier
+								inputPrice: 0.3,
+								outputPrice: 1.2,
+							},
+						],
+					}
+
+					// Test below first threshold (128K)
+					let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000)
+					expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6)
+
+					// Test between first and second threshold (150K)
+					result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000)
+					expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6)
+
+					// Test above second threshold (500K)
+					result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000)
+					expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6)
+				})
+			})
+
+			describe("tiered pricing for OpenAI", () => {
+				const modelWithTiers: ModelInfo = {
+					contextWindow: 200_000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					inputPrice: 3.0, // $3 per million tokens (<= 200K)
+					outputPrice: 15.0, // $15 per million tokens (<= 200K)
+					cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
+					cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
+					tiers: [
+						{
+							contextWindow: 1_000_000, // 1M tokens
+							inputPrice: 6.0, // $6 per million tokens (> 200K)
+							outputPrice: 22.5, // $22.50 per million tokens (> 200K)
+							cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
+							cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
+						},
+					],
+				}
+
+				it("should use tier prices for OpenAI when total input tokens exceed threshold", () => {
+					// Total input: 300K (includes all tokens)
+					const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000)
+
+					// Total input is 300K (above 200K, below 1M) - uses tier pricing
+					// Non-cached input: 300K - 100K - 100K = 100K
+					// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
+					// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
+					// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
+					// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
+					// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
+					expect(result.totalInputTokens).toBe(300_000)
+					expect(result.totalOutputTokens).toBe(20_000)
+					expect(result.totalCost).toBeCloseTo(1.86, 6)
+				})
+
+				it("should use base prices for OpenAI when total input tokens are below threshold", () => {
+					// Total input: 150K (includes all tokens)
+					const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000)
+
+					// Total input is 150K (below 200K) - uses base pricing
+					// Non-cached input: 150K - 50K - 50K = 50K
+					// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
+					// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
+					// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
+					// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
+					// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
+					expect(result.totalInputTokens).toBe(150_000)
+					expect(result.totalOutputTokens).toBe(10_000)
+					expect(result.totalCost).toBeCloseTo(0.5025, 6)
+				})
+			})
 		})
 	})
 })