RooCodeInc · roomote · Nov 2, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025
@@ -6,6 +6,65 @@ export interface ApiCostResult {
 	totalCost: number
 }
 
+/**
+ * Finds the appropriate pricing tier based on the total input tokens.
+ * Returns the prices from the matching tier, or the base prices if no tiers are defined.
+ */
+function getTieredPricing(
+	modelInfo: ModelInfo,
+	totalInputTokens: number,
+): {
+	inputPrice: number | undefined
+	outputPrice: number | undefined
+	cacheWritesPrice: number | undefined
+	cacheReadsPrice: number | undefined
+} {
+	// If there are no tiers defined, use the base prices
+	if (!modelInfo.tiers || modelInfo.tiers.length === 0) {
+		return {
+			inputPrice: modelInfo.inputPrice,
+			outputPrice: modelInfo.outputPrice,
+			cacheWritesPrice: modelInfo.cacheWritesPrice,
+			cacheReadsPrice: modelInfo.cacheReadsPrice,
+		}
+	}
+
+	// If within base context window, use base prices
+	if (totalInputTokens <= modelInfo.contextWindow) {
+		return {
+			inputPrice: modelInfo.inputPrice,
+			outputPrice: modelInfo.outputPrice,
+			cacheWritesPrice: modelInfo.cacheWritesPrice,
+			cacheReadsPrice: modelInfo.cacheReadsPrice,
+		}
+	}
+
+	// Find the appropriate tier based on the total input tokens
+	// Tiers are checked in order, and we use the first tier where the token count
+	// is less than or equal to the tier's context window
+	const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow)
+
+	if (tier) {
+		// Use tier prices, falling back to base prices if not defined in the tier
+		return {
+			inputPrice: tier.inputPrice ?? modelInfo.inputPrice,
+			outputPrice: tier.outputPrice ?? modelInfo.outputPrice,
+			cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
+			cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
+		}
+	}
+
+	// If no tier matches (all tiers have smaller context windows than the token count),
+	// use the last (highest) tier's prices
+	const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1]
+	return {
+		inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice,
+		outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice,
+		cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
+		cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
+	}
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
@@ -15,10 +74,13 @@ function calculateApiCostInternal(
 	totalInputTokens: number,
 	totalOutputTokens: number,
 ): ApiCostResult {
-	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
-	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
-	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
-	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
+	// Get the appropriate prices based on the total input tokens (for tiered pricing)
+	const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens)
+
+	const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
+	const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
+	const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens
+	const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 
 	return {

diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts
@@ -221,5 +221,190 @@ describe("Cost Utility", () => {
 			expect(result.totalInputTokens).toBe(6000) // Total already includes cache
 			expect(result.totalOutputTokens).toBe(500)
 		})
+
+		describe("tiered pricing", () => {
+			const modelWithTiers: ModelInfo = {
+				contextWindow: 200_000,
+				supportsImages: true,
+				supportsPromptCache: true,
+				inputPrice: 3.0, // $3 per million tokens (<= 200K)
+				outputPrice: 15.0, // $15 per million tokens (<= 200K)
+				cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
+				cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
+				tiers: [
+					{
+						contextWindow: 1_000_000, // 1M tokens
+						inputPrice: 6.0, // $6 per million tokens (> 200K)
+						outputPrice: 22.5, // $22.50 per million tokens (> 200K)
+						cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
+						cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
+					},
+				],
+			}
+
+			it("should use base prices when total input tokens are below 200K", () => {
+				const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000)
+
+				// Total input: 50K + 50K + 50K = 150K (below 200K threshold)
+				// Should use base prices: $3/$15
+				// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
+				// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
+				// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
+				// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
+				// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
+				expect(result.totalInputTokens).toBe(150_000)
+				expect(result.totalOutputTokens).toBe(10_000)
+				expect(result.totalCost).toBeCloseTo(0.5025, 6)
+			})
+
+			it("should use tier prices when total input tokens exceed 200K", () => {
+				const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000)
+
+				// Total input: 100K + 100K + 100K = 300K (above 200K, below 1M)
+				// Should use tier prices: $6/$22.50
+				// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
+				// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
+				// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
+				// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
+				// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
+				expect(result.totalInputTokens).toBe(300_000)
+				expect(result.totalOutputTokens).toBe(20_000)
+				expect(result.totalCost).toBeCloseTo(1.86, 6)
+			})
+
+			it("should use the highest tier prices when exceeding all tier thresholds", () => {
+				const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000)
+
+				// Total input: 500K + 300K + 300K = 1.1M (above 1M threshold)
+				// Should use highest tier prices: $6/$22.50 (last tier)
+				// Input cost: (6.0 / 1_000_000) * 500_000 = 3.0
+				// Output cost: (22.5 / 1_000_000) * 50_000 = 1.125
+				// Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25
+				// Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18
+				// Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555
+				expect(result.totalInputTokens).toBe(1_100_000)
+				expect(result.totalOutputTokens).toBe(50_000)
+				expect(result.totalCost).toBeCloseTo(6.555, 6)
+			})
+
+			it("should handle partial tier definitions", () => {
+				// Model where tier only overrides some prices
+				const modelPartialTiers: ModelInfo = {
+					contextWindow: 200_000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					inputPrice: 3.0,
+					outputPrice: 15.0,
+					cacheWritesPrice: 3.75,
+					cacheReadsPrice: 0.3,
+					tiers: [
+						{
+							contextWindow: 1_000_000,
+							inputPrice: 6.0, // Only input price changes
+							// output, cacheWrites, cacheReads prices should fall back to base
+						},
+					],
+				}
+
+				const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000)
+
+				// Total input: 300K (uses tier)
+				// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price)
+				// Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price)
+				// Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price)
+				// Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price)
+				// Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305
+				expect(result.totalInputTokens).toBe(300_000)
+				expect(result.totalOutputTokens).toBe(20_000)
+				expect(result.totalCost).toBeCloseTo(1.305, 6)
+			})
+
+			it("should handle multiple tiers correctly", () => {
+				const modelMultipleTiers: ModelInfo = {
+					contextWindow: 128_000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					inputPrice: 0.075, // <= 128K
+					outputPrice: 0.3,
+					tiers: [
+						{
+							contextWindow: 200_000, // First tier
+							inputPrice: 0.15,
+							outputPrice: 0.6,
+						},
+						{
+							contextWindow: 1_000_000, // Second tier
+							inputPrice: 0.3,
+							outputPrice: 1.2,
+						},
+					],
+				}
+
+				// Test below first threshold (128K)
+				let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000)
+				expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6)
+
+				// Test between first and second threshold (150K)
+				result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000)
+				expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6)
+
+				// Test above second threshold (500K)
+				result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000)
+				expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6)
+			})
+		})
+
+		describe("tiered pricing for OpenAI", () => {
+			const modelWithTiers: ModelInfo = {
+				contextWindow: 200_000,
+				supportsImages: true,
+				supportsPromptCache: true,
+				inputPrice: 3.0, // $3 per million tokens (<= 200K)
+				outputPrice: 15.0, // $15 per million tokens (<= 200K)
+				cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
+				cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
+				tiers: [
+					{
+						contextWindow: 1_000_000, // 1M tokens
+						inputPrice: 6.0, // $6 per million tokens (> 200K)
+						outputPrice: 22.5, // $22.50 per million tokens (> 200K)
+						cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
+						cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
+					},
+				],
+			}
+
+			it("should use tier prices for OpenAI when total input tokens exceed threshold", () => {
+				// Total input: 300K (includes all tokens)
+				const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000)
+
+				// Total input is 300K (above 200K, below 1M) - uses tier pricing
+				// Non-cached input: 300K - 100K - 100K = 100K
+				// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
+				// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
+				// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
+				// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
+				// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
+				expect(result.totalInputTokens).toBe(300_000)
+				expect(result.totalOutputTokens).toBe(20_000)
+				expect(result.totalCost).toBeCloseTo(1.86, 6)
+			})
+
+			it("should use base prices for OpenAI when total input tokens are below threshold", () => {
+				// Total input: 150K (includes all tokens)
+				const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000)
+
+				// Total input is 150K (below 200K) - uses base pricing
+				// Non-cached input: 150K - 50K - 50K = 50K
+				// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
+				// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
+				// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
+				// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
+				// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
+				expect(result.totalInputTokens).toBe(150_000)
+				expect(result.totalOutputTokens).toBe(10_000)
+				expect(result.totalCost).toBeCloseTo(0.5025, 6)
+			})
+		})
 	})
 })