-
Notifications
You must be signed in to change notification settings - Fork 2.4k
fix: add tiered pricing support for models with different token tier rates #8984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
921cc45
5e34db1
7cfe770
389524d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -221,5 +221,190 @@ describe("Cost Utility", () => { | |
| expect(result.totalInputTokens).toBe(6000) // Total already includes cache | ||
| expect(result.totalOutputTokens).toBe(500) | ||
| }) | ||
|
|
||
| describe("tiered pricing", () => { | ||
| const modelWithTiers: ModelInfo = { | ||
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| inputPrice: 3.0, // $3 per million tokens (<= 200K) | ||
| outputPrice: 15.0, // $15 per million tokens (<= 200K) | ||
| cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K) | ||
| cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K) | ||
| tiers: [ | ||
| { | ||
| contextWindow: 1_000_000, // 1M tokens | ||
| inputPrice: 6.0, // $6 per million tokens (> 200K) | ||
| outputPrice: 22.5, // $22.50 per million tokens (> 200K) | ||
| cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K) | ||
| cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K) | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| it("should use base prices when total input tokens are below 200K", () => { | ||
| const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000) | ||
|
|
||
| // Total input: 50K + 50K + 50K = 150K (below 200K threshold) | ||
| // Should use base prices: $3/$15 | ||
| // Input cost: (3.0 / 1_000_000) * 50_000 = 0.15 | ||
| // Output cost: (15.0 / 1_000_000) * 10_000 = 0.15 | ||
| // Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875 | ||
| // Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015 | ||
| // Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025 | ||
| expect(result.totalInputTokens).toBe(150_000) | ||
| expect(result.totalOutputTokens).toBe(10_000) | ||
| expect(result.totalCost).toBeCloseTo(0.5025, 6) | ||
| }) | ||
|
|
||
| it("should use tier prices when total input tokens exceed 200K", () => { | ||
| const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000) | ||
|
|
||
| // Total input: 100K + 100K + 100K = 300K (above 200K, below 1M) | ||
| // Should use tier prices: $6/$22.50 | ||
| // Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 | ||
| // Output cost: (22.5 / 1_000_000) * 20_000 = 0.45 | ||
| // Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75 | ||
| // Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06 | ||
| // Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86 | ||
| expect(result.totalInputTokens).toBe(300_000) | ||
| expect(result.totalOutputTokens).toBe(20_000) | ||
| expect(result.totalCost).toBeCloseTo(1.86, 6) | ||
| }) | ||
|
|
||
| it("should use the highest tier prices when exceeding all tier thresholds", () => { | ||
| const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000) | ||
|
|
||
| // Total input: 500K + 300K + 300K = 1.1M (above 1M threshold) | ||
| // Should use highest tier prices: $6/$22.50 (last tier) | ||
| // Input cost: (6.0 / 1_000_000) * 500_000 = 3.0 | ||
| // Output cost: (22.5 / 1_000_000) * 50_000 = 1.125 | ||
| // Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25 | ||
| // Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18 | ||
| // Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555 | ||
| expect(result.totalInputTokens).toBe(1_100_000) | ||
| expect(result.totalOutputTokens).toBe(50_000) | ||
| expect(result.totalCost).toBeCloseTo(6.555, 6) | ||
| }) | ||
|
|
||
| it("should handle partial tier definitions", () => { | ||
| // Model where tier only overrides some prices | ||
| const modelPartialTiers: ModelInfo = { | ||
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| inputPrice: 3.0, | ||
| outputPrice: 15.0, | ||
| cacheWritesPrice: 3.75, | ||
| cacheReadsPrice: 0.3, | ||
| tiers: [ | ||
| { | ||
| contextWindow: 1_000_000, | ||
| inputPrice: 6.0, // Only input price changes | ||
| // output, cacheWrites, cacheReads prices should fall back to base | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000) | ||
|
|
||
| // Total input: 300K (uses tier) | ||
| // Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price) | ||
| // Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price) | ||
| // Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price) | ||
| // Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price) | ||
| // Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305 | ||
| expect(result.totalInputTokens).toBe(300_000) | ||
| expect(result.totalOutputTokens).toBe(20_000) | ||
| expect(result.totalCost).toBeCloseTo(1.305, 6) | ||
| }) | ||
|
|
||
| it("should handle multiple tiers correctly", () => { | ||
| const modelMultipleTiers: ModelInfo = { | ||
| contextWindow: 128_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| inputPrice: 0.075, // <= 128K | ||
| outputPrice: 0.3, | ||
| tiers: [ | ||
| { | ||
| contextWindow: 200_000, // First tier | ||
| inputPrice: 0.15, | ||
| outputPrice: 0.6, | ||
| }, | ||
| { | ||
| contextWindow: 1_000_000, // Second tier | ||
| inputPrice: 0.3, | ||
| outputPrice: 1.2, | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| // Test below first threshold (128K) | ||
| let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000) | ||
| expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6) | ||
|
|
||
| // Test between first and second threshold (150K) | ||
| result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000) | ||
| expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6) | ||
|
|
||
| // Test above second threshold (500K) | ||
| result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000) | ||
| expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6) | ||
| }) | ||
| }) | ||
|
||
|
|
||
| describe("tiered pricing for OpenAI", () => { | ||
| const modelWithTiers: ModelInfo = { | ||
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| inputPrice: 3.0, // $3 per million tokens (<= 200K) | ||
| outputPrice: 15.0, // $15 per million tokens (<= 200K) | ||
| cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K) | ||
| cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K) | ||
| tiers: [ | ||
| { | ||
| contextWindow: 1_000_000, // 1M tokens | ||
| inputPrice: 6.0, // $6 per million tokens (> 200K) | ||
| outputPrice: 22.5, // $22.50 per million tokens (> 200K) | ||
| cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K) | ||
| cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K) | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| it("should use tier prices for OpenAI when total input tokens exceed threshold", () => { | ||
| // Total input: 300K (includes all tokens) | ||
| const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000) | ||
|
|
||
| // Total input is 300K (above 200K, below 1M) - uses tier pricing | ||
| // Non-cached input: 300K - 100K - 100K = 100K | ||
| // Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 | ||
| // Output cost: (22.5 / 1_000_000) * 20_000 = 0.45 | ||
| // Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75 | ||
| // Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06 | ||
| // Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86 | ||
| expect(result.totalInputTokens).toBe(300_000) | ||
| expect(result.totalOutputTokens).toBe(20_000) | ||
| expect(result.totalCost).toBeCloseTo(1.86, 6) | ||
| }) | ||
|
|
||
| it("should use base prices for OpenAI when total input tokens are below threshold", () => { | ||
| // Total input: 150K (includes all tokens) | ||
| const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000) | ||
|
|
||
| // Total input is 150K (below 200K) - uses base pricing | ||
| // Non-cached input: 150K - 50K - 50K = 50K | ||
| // Input cost: (3.0 / 1_000_000) * 50_000 = 0.15 | ||
| // Output cost: (15.0 / 1_000_000) * 10_000 = 0.15 | ||
| // Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875 | ||
| // Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015 | ||
| // Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025 | ||
| expect(result.totalInputTokens).toBe(150_000) | ||
| expect(result.totalOutputTokens).toBe(10_000) | ||
| expect(result.totalCost).toBeCloseTo(0.5025, 6) | ||
| }) | ||
| }) | ||
| }) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tiered pricing logic is broken. This check will almost always be true for valid token counts (e.g., Gemini 2.5 Pro has contextWindow=1,048,576), causing the function to return base prices and never evaluate tiers. For example, with 300K tokens and contextWindow=1M, this check passes and returns base prices instead of checking the 200K tier threshold. The check should compare against a tier threshold (e.g., 200K for Gemini), not the full context window. This makes the entire tiered pricing feature non-functional—models will always use base prices regardless of token count.