Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 66 additions & 4 deletions src/shared/cost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,65 @@ export interface ApiCostResult {
totalCost: number
}

/**
* Finds the appropriate pricing tier based on the total input tokens.
* Returns the prices from the matching tier, or the base prices if no tiers are defined.
*/
function getTieredPricing(
modelInfo: ModelInfo,
totalInputTokens: number,
): {
inputPrice: number | undefined
outputPrice: number | undefined
cacheWritesPrice: number | undefined
cacheReadsPrice: number | undefined
} {
// If there are no tiers defined, use the base prices
if (!modelInfo.tiers || modelInfo.tiers.length === 0) {
return {
inputPrice: modelInfo.inputPrice,
outputPrice: modelInfo.outputPrice,
cacheWritesPrice: modelInfo.cacheWritesPrice,
cacheReadsPrice: modelInfo.cacheReadsPrice,
}
}

// If within base context window, use base prices
if (totalInputTokens <= modelInfo.contextWindow) {
return {
inputPrice: modelInfo.inputPrice,
outputPrice: modelInfo.outputPrice,
cacheWritesPrice: modelInfo.cacheWritesPrice,
cacheReadsPrice: modelInfo.cacheReadsPrice,
}
}
Comment on lines +32 to +40
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tiered pricing logic is broken. This check will almost always be true for valid token counts (e.g., Gemini 2.5 Pro has contextWindow=1,048,576), causing the function to return base prices and never evaluate tiers. For example, with 300K tokens and contextWindow=1M, this check passes and returns base prices instead of checking the 200K tier threshold. The check should compare against a tier threshold (e.g., 200K for Gemini), not the full context window. This makes the entire tiered pricing feature non-functional—models will always use base prices regardless of token count.


// Find the appropriate tier based on the total input tokens
// Tiers are checked in order, and we use the first tier where the token count
// is less than or equal to the tier's context window
const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow)

if (tier) {
// Use tier prices, falling back to base prices if not defined in the tier
return {
inputPrice: tier.inputPrice ?? modelInfo.inputPrice,
outputPrice: tier.outputPrice ?? modelInfo.outputPrice,
cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
}
}

// If no tier matches (all tiers have smaller context windows than the token count),
// use the last (highest) tier's prices
const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1]
return {
inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice,
outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice,
cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
}
}

function calculateApiCostInternal(
modelInfo: ModelInfo,
inputTokens: number,
Expand All @@ -15,10 +74,13 @@ function calculateApiCostInternal(
totalInputTokens: number,
totalOutputTokens: number,
): ApiCostResult {
const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
// Get the appropriate prices based on the total input tokens (for tiered pricing)
const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens)

const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost

return {
Expand Down
185 changes: 185 additions & 0 deletions src/utils/__tests__/cost.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,5 +221,190 @@ describe("Cost Utility", () => {
expect(result.totalInputTokens).toBe(6000) // Total already includes cache
expect(result.totalOutputTokens).toBe(500)
})

describe("tiered pricing", () => {
const modelWithTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million tokens (<= 200K)
outputPrice: 15.0, // $15 per million tokens (<= 200K)
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
tiers: [
{
contextWindow: 1_000_000, // 1M tokens
inputPrice: 6.0, // $6 per million tokens (> 200K)
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
},
],
}

it("should use base prices when total input tokens are below 200K", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000)

// Total input: 50K + 50K + 50K = 150K (below 200K threshold)
// Should use base prices: $3/$15
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
expect(result.totalInputTokens).toBe(150_000)
expect(result.totalOutputTokens).toBe(10_000)
expect(result.totalCost).toBeCloseTo(0.5025, 6)
})

it("should use tier prices when total input tokens exceed 200K", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000)

// Total input: 100K + 100K + 100K = 300K (above 200K, below 1M)
// Should use tier prices: $6/$22.50
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.86, 6)
})

it("should use the highest tier prices when exceeding all tier thresholds", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000)

// Total input: 500K + 300K + 300K = 1.1M (above 1M threshold)
// Should use highest tier prices: $6/$22.50 (last tier)
// Input cost: (6.0 / 1_000_000) * 500_000 = 3.0
// Output cost: (22.5 / 1_000_000) * 50_000 = 1.125
// Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25
// Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18
// Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555
expect(result.totalInputTokens).toBe(1_100_000)
expect(result.totalOutputTokens).toBe(50_000)
expect(result.totalCost).toBeCloseTo(6.555, 6)
})

it("should handle partial tier definitions", () => {
// Model where tier only overrides some prices
const modelPartialTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
tiers: [
{
contextWindow: 1_000_000,
inputPrice: 6.0, // Only input price changes
// output, cacheWrites, cacheReads prices should fall back to base
},
],
}

const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000)

// Total input: 300K (uses tier)
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price)
// Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price)
// Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price)
// Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price)
// Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.305, 6)
})

it("should handle multiple tiers correctly", () => {
const modelMultipleTiers: ModelInfo = {
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.075, // <= 128K
outputPrice: 0.3,
tiers: [
{
contextWindow: 200_000, // First tier
inputPrice: 0.15,
outputPrice: 0.6,
},
{
contextWindow: 1_000_000, // Second tier
inputPrice: 0.3,
outputPrice: 1.2,
},
],
}

// Test below first threshold (128K)
let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6)

// Test between first and second threshold (150K)
result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6)

// Test above second threshold (500K)
result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6)
})
})
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The describe("tiered pricing") block and all its tests call calculateApiCostAnthropic(), but this describe block is nested inside the describe("calculateApiCostOpenAI") block that starts at line 117. This creates test organization confusion. These Anthropic-specific tiered pricing tests should be moved inside the describe("calculateApiCostAnthropic") block (which currently ends at line 115) to match their actual function under test.


describe("tiered pricing for OpenAI", () => {
const modelWithTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million tokens (<= 200K)
outputPrice: 15.0, // $15 per million tokens (<= 200K)
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
tiers: [
{
contextWindow: 1_000_000, // 1M tokens
inputPrice: 6.0, // $6 per million tokens (> 200K)
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
},
],
}

it("should use tier prices for OpenAI when total input tokens exceed threshold", () => {
// Total input: 300K (includes all tokens)
const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000)

// Total input is 300K (above 200K, below 1M) - uses tier pricing
// Non-cached input: 300K - 100K - 100K = 100K
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.86, 6)
})

it("should use base prices for OpenAI when total input tokens are below threshold", () => {
// Total input: 150K (includes all tokens)
const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000)

// Total input is 150K (below 200K) - uses base pricing
// Non-cached input: 150K - 50K - 50K = 50K
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
expect(result.totalInputTokens).toBe(150_000)
expect(result.totalOutputTokens).toBe(10_000)
expect(result.totalCost).toBeCloseTo(0.5025, 6)
})
})
})
})