Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 56 additions & 4 deletions src/shared/cost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,55 @@ export interface ApiCostResult {
totalCost: number
}

/**
* Finds the appropriate pricing tier based on the total input tokens.
* Returns the prices from the matching tier, or the base prices if no tiers are defined.
*/
function getTieredPricing(
modelInfo: ModelInfo,
totalInputTokens: number,
): {
inputPrice: number | undefined
outputPrice: number | undefined
cacheWritesPrice: number | undefined
cacheReadsPrice: number | undefined
} {
// If there are no tiers defined, use the base prices
if (!modelInfo.tiers || modelInfo.tiers.length === 0) {
return {
inputPrice: modelInfo.inputPrice,
outputPrice: modelInfo.outputPrice,
cacheWritesPrice: modelInfo.cacheWritesPrice,
cacheReadsPrice: modelInfo.cacheReadsPrice,
}
}

// Find the appropriate tier based on the total input tokens
// Tiers are checked in order, and we use the first tier where the token count
// is less than or equal to the tier's context window
const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow)

if (tier) {
// Use tier prices, falling back to base prices if not defined in the tier
return {
inputPrice: tier.inputPrice ?? modelInfo.inputPrice,
outputPrice: tier.outputPrice ?? modelInfo.outputPrice,
cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
}
}

// If no tier matches (all tiers have smaller context windows than the token count),
// use the last (highest) tier's prices
const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1]
return {
inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice,
outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice,
cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
}
}

function calculateApiCostInternal(
modelInfo: ModelInfo,
inputTokens: number,
Expand All @@ -15,10 +64,13 @@ function calculateApiCostInternal(
totalInputTokens: number,
totalOutputTokens: number,
): ApiCostResult {
const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
// Get the appropriate prices based on the total input tokens (for tiered pricing)
const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens)

const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost

return {
Expand Down
185 changes: 185 additions & 0 deletions src/utils/__tests__/cost.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,191 @@ describe("Cost Utility", () => {
expect(result.totalCost).toBe(0.0105)
expect(result.totalInputTokens).toBe(6000) // Total already includes cache
expect(result.totalOutputTokens).toBe(500)

describe("tiered pricing", () => {
const modelWithTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million tokens (<= 200K)
outputPrice: 15.0, // $15 per million tokens (<= 200K)
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
tiers: [
{
contextWindow: 1_000_000, // 1M tokens
inputPrice: 6.0, // $6 per million tokens (> 200K)
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
},
],
}

it("should use base prices when total input tokens are below 200K", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 50_000, 10_000, 50_000, 50_000)

// Total input: 50K + 50K + 50K = 150K (below 200K threshold)
// Should use base prices: $3/$15
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
expect(result.totalInputTokens).toBe(150_000)
expect(result.totalOutputTokens).toBe(10_000)
expect(result.totalCost).toBeCloseTo(0.5025, 6)
})

it("should use tier prices when total input tokens exceed 200K", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 100_000, 20_000, 100_000, 100_000)

// Total input: 100K + 100K + 100K = 300K (above 200K, below 1M)
// Should use tier prices: $6/$22.50
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.86, 6)
})

it("should use the highest tier prices when exceeding all tier thresholds", () => {
const result = calculateApiCostAnthropic(modelWithTiers, 500_000, 50_000, 300_000, 300_000)

// Total input: 500K + 300K + 300K = 1.1M (above 1M threshold)
// Should use highest tier prices: $6/$22.50 (last tier)
// Input cost: (6.0 / 1_000_000) * 500_000 = 3.0
// Output cost: (22.5 / 1_000_000) * 50_000 = 1.125
// Cache writes: (7.5 / 1_000_000) * 300_000 = 2.25
// Cache reads: (0.6 / 1_000_000) * 300_000 = 0.18
// Total: 3.0 + 1.125 + 2.25 + 0.18 = 6.555
expect(result.totalInputTokens).toBe(1_100_000)
expect(result.totalOutputTokens).toBe(50_000)
expect(result.totalCost).toBeCloseTo(6.555, 6)
})

it("should handle partial tier definitions", () => {
// Model where tier only overrides some prices
const modelPartialTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
tiers: [
{
contextWindow: 1_000_000,
inputPrice: 6.0, // Only input price changes
// output, cacheWrites, cacheReads prices should fall back to base
},
],
}

const result = calculateApiCostAnthropic(modelPartialTiers, 100_000, 20_000, 100_000, 100_000)

// Total input: 300K (uses tier)
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6 (tier price)
// Output cost: (15.0 / 1_000_000) * 20_000 = 0.3 (base price)
// Cache writes: (3.75 / 1_000_000) * 100_000 = 0.375 (base price)
// Cache reads: (0.3 / 1_000_000) * 100_000 = 0.03 (base price)
// Total: 0.6 + 0.3 + 0.375 + 0.03 = 1.305
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.305, 6)
})

it("should handle multiple tiers correctly", () => {
const modelMultipleTiers: ModelInfo = {
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.075, // <= 128K
outputPrice: 0.3,
tiers: [
{
contextWindow: 200_000, // First tier
inputPrice: 0.15,
outputPrice: 0.6,
},
{
contextWindow: 1_000_000, // Second tier
inputPrice: 0.3,
outputPrice: 1.2,
},
],
}

// Test below first threshold (128K)
let result = calculateApiCostAnthropic(modelMultipleTiers, 50_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.075 * 50 + 0.3 * 10) / 1000, 6)

// Test between first and second threshold (150K)
result = calculateApiCostAnthropic(modelMultipleTiers, 150_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.15 * 150 + 0.6 * 10) / 1000, 6)

// Test above second threshold (500K)
result = calculateApiCostAnthropic(modelMultipleTiers, 500_000, 10_000)
expect(result.totalCost).toBeCloseTo((0.3 * 500 + 1.2 * 10) / 1000, 6)
})
})

describe("tiered pricing for OpenAI", () => {
const modelWithTiers: ModelInfo = {
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million tokens (<= 200K)
outputPrice: 15.0, // $15 per million tokens (<= 200K)
cacheWritesPrice: 3.75, // $3.75 per million tokens (<= 200K)
cacheReadsPrice: 0.3, // $0.30 per million tokens (<= 200K)
tiers: [
{
contextWindow: 1_000_000, // 1M tokens
inputPrice: 6.0, // $6 per million tokens (> 200K)
outputPrice: 22.5, // $22.50 per million tokens (> 200K)
cacheWritesPrice: 7.5, // $7.50 per million tokens (> 200K)
cacheReadsPrice: 0.6, // $0.60 per million tokens (> 200K)
},
],
}

it("should use tier prices for OpenAI when total input tokens exceed threshold", () => {
// Total input: 300K (includes all tokens)
const result = calculateApiCostOpenAI(modelWithTiers, 300_000, 20_000, 100_000, 100_000)

// Total input is 300K (above 200K, below 1M) - uses tier pricing
// Non-cached input: 300K - 100K - 100K = 100K
// Input cost: (6.0 / 1_000_000) * 100_000 = 0.6
// Output cost: (22.5 / 1_000_000) * 20_000 = 0.45
// Cache writes: (7.5 / 1_000_000) * 100_000 = 0.75
// Cache reads: (0.6 / 1_000_000) * 100_000 = 0.06
// Total: 0.6 + 0.45 + 0.75 + 0.06 = 1.86
expect(result.totalInputTokens).toBe(300_000)
expect(result.totalOutputTokens).toBe(20_000)
expect(result.totalCost).toBeCloseTo(1.86, 6)
})

it("should use base prices for OpenAI when total input tokens are below threshold", () => {
// Total input: 150K (includes all tokens)
const result = calculateApiCostOpenAI(modelWithTiers, 150_000, 10_000, 50_000, 50_000)

// Total input is 150K (below 200K) - uses base pricing
// Non-cached input: 150K - 50K - 50K = 50K
// Input cost: (3.0 / 1_000_000) * 50_000 = 0.15
// Output cost: (15.0 / 1_000_000) * 10_000 = 0.15
// Cache writes: (3.75 / 1_000_000) * 50_000 = 0.1875
// Cache reads: (0.3 / 1_000_000) * 50_000 = 0.015
// Total: 0.15 + 0.15 + 0.1875 + 0.015 = 0.5025
expect(result.totalInputTokens).toBe(150_000)
expect(result.totalOutputTokens).toBe(10_000)
expect(result.totalCost).toBeCloseTo(0.5025, 6)
})
})
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The describe blocks for tiered pricing tests are incorrectly nested inside the previous it() block (line 207 "should handle missing cache prices"). Test frameworks like Vitest do not execute describe blocks nested within it() blocks, so these tests are never actually running. This explains why the PR reports all tests passing despite the new test suites being added. These test suites should be moved outside and placed as siblings to the other test blocks within the "calculateApiCostAnthropic" and "calculateApiCostOpenAI" describe blocks respectively.

})
})
})
Loading