diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 9935d90b127a..10676dd19d98 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -26,6 +26,15 @@ export const bedrockModels = { minTokensPerCachePoint: 1024, maxCachePoints: 4, cachableFields: ["system", "messages", "tools"], + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 6.0, + outputPrice: 22.5, + cacheWritesPrice: 7.5, + cacheReadsPrice: 0.6, + }, + ], }, "amazon.nova-pro-v1:0": { maxTokens: 5000, @@ -90,6 +99,15 @@ export const bedrockModels = { minTokensPerCachePoint: 1024, maxCachePoints: 4, cachableFields: ["system", "messages", "tools"], + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 6.0, + outputPrice: 22.5, + cacheWritesPrice: 7.5, + cacheReadsPrice: 0.6, + }, + ], }, "anthropic.claude-opus-4-1-20250805-v1:0": { maxTokens: 8192, diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index aae428d90c02..c8bac1f5b30c 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -138,24 +138,19 @@ export const geminiModels = { contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, + inputPrice: 1.25, // Base price for ≤200k tokens + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, maxThinkingTokens: 32_768, supportsReasoningBudget: true, tiers: [ { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, + contextWindow: 1_000_000, + inputPrice: 2.5, // >200k tokens outputPrice: 15, - cacheReadsPrice: 0.625, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, }, ], }, @@ -164,22 +159,17 @@ export const geminiModels = { contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, + inputPrice: 1.25, // Base price for ≤200k tokens + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, tiers: [ { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, + contextWindow: 1_000_000, + inputPrice: 2.5, // >200k tokens outputPrice: 15, - cacheReadsPrice: 0.625, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, }, ], }, @@ -188,24 +178,19 @@ export const geminiModels = { contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, + inputPrice: 1.25, // Base price for ≤200k tokens + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, maxThinkingTokens: 32_768, supportsReasoningBudget: true, tiers: [ { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, + contextWindow: 1_000_000, + inputPrice: 2.5, // >200k tokens outputPrice: 15, - cacheReadsPrice: 0.625, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, }, ], }, @@ -222,25 +207,20 @@ export const geminiModels = { contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. - outputPrice: 15, - cacheReadsPrice: 0.625, - cacheWritesPrice: 4.5, + inputPrice: 1.25, // Base price for ≤200k tokens + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, maxThinkingTokens: 32_768, supportsReasoningBudget: true, requiredReasoningBudget: true, tiers: [ { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, - inputPrice: 2.5, + contextWindow: 1_000_000, + inputPrice: 2.5, // >200k tokens outputPrice: 15, - cacheReadsPrice: 0.625, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, }, ], }, diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index f277c58a3ef2..d15d583ef1cd 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -61,49 +61,79 @@ export const vertexModels = { contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ], }, "gemini-2.5-pro-preview-05-06": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ], }, "gemini-2.5-pro-preview-06-05": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, maxThinkingTokens: 32_768, supportsReasoningBudget: true, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ], }, "gemini-2.5-pro": { maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 2.5, - outputPrice: 15, + inputPrice: 1.25, + outputPrice: 10, + cacheReadsPrice: 0.125, + cacheWritesPrice: 1.625, maxThinkingTokens: 32_768, supportsReasoningBudget: true, requiredReasoningBudget: true, tiers: [ { - contextWindow: 200_000, - inputPrice: 1.25, - outputPrice: 10, - cacheReadsPrice: 0.31, - }, - { - contextWindow: Infinity, + contextWindow: 1_000_000, inputPrice: 2.5, outputPrice: 15, - cacheReadsPrice: 0.625, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, }, ], }, @@ -173,6 +203,15 @@ export const vertexModels = { cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, supportsReasoningBudget: true, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 6.0, + outputPrice: 22.5, + cacheWritesPrice: 7.5, + cacheReadsPrice: 0.6, + }, + ], }, "claude-sonnet-4-5@20250929": { maxTokens: 8192, @@ -184,6 +223,15 @@ export const vertexModels = { cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, supportsReasoningBudget: true, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 6.0, + outputPrice: 22.5, + cacheWritesPrice: 7.5, + cacheReadsPrice: 0.6, + }, + ], }, "claude-haiku-4-5@20251001": { maxTokens: 8192, @@ -216,6 +264,15 @@ export const vertexModels = { outputPrice: 75.0, cacheWritesPrice: 18.75, cacheReadsPrice: 1.5, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 30.0, + outputPrice: 112.5, + cacheWritesPrice: 37.5, + cacheReadsPrice: 3.0, + }, + ], }, "claude-3-7-sonnet@20250219:thinking": { maxTokens: 64_000, diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 37cdc5443980..3a1393a7ee45 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -93,10 +93,19 @@ describe("OpenRouter API", () => { inputPrice: 1.25, outputPrice: 10, cacheWritesPrice: 1.625, - cacheReadsPrice: 0.31, + cacheReadsPrice: 0.125, description: undefined, supportsReasoningEffort: undefined, supportedParameters: undefined, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ], }, "google-ai-studio": { maxTokens: 65536, @@ -107,10 +116,19 @@ describe("OpenRouter API", () => { inputPrice: 1.25, outputPrice: 10, cacheWritesPrice: 1.625, - cacheReadsPrice: 0.31, + cacheReadsPrice: 0.125, description: undefined, supportsReasoningEffort: undefined, supportedParameters: undefined, + tiers: [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ], }, }) diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index b546c40a3cfc..c058e747bb81 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -263,5 +263,56 @@ export const parseOpenRouterModel = ({ modelInfo.maxTokens = 32768 } + // Add tiered pricing for Gemini 2.5 Pro models on OpenRouter + if (id.includes("gemini-2.5-pro") || id.includes("gemini/2.5-pro")) { + modelInfo.inputPrice = 1.25 + modelInfo.outputPrice = 10 + modelInfo.cacheReadsPrice = 0.125 + modelInfo.cacheWritesPrice = 1.625 + modelInfo.tiers = [ + { + contextWindow: 1_000_000, + inputPrice: 2.5, + outputPrice: 15, + cacheReadsPrice: 0.25, + cacheWritesPrice: 2.875, + }, + ] + } + + // Add tiered pricing for Claude Sonnet 4 and 4.5 on OpenRouter + if (id === "anthropic/claude-sonnet-4" || id === "anthropic/claude-sonnet-4.5") { + modelInfo.inputPrice = 3.0 + modelInfo.outputPrice = 15.0 + modelInfo.cacheWritesPrice = 3.75 + modelInfo.cacheReadsPrice = 0.3 + modelInfo.tiers = [ + { + contextWindow: 1_000_000, + inputPrice: 6.0, + outputPrice: 22.5, + cacheWritesPrice: 7.5, + cacheReadsPrice: 0.6, + }, + ] + } + + // Add tiered pricing for Qwen 3 Max on OpenRouter + if (id.toLowerCase().includes("qwen") && id.toLowerCase().includes("max")) { + modelInfo.inputPrice = 1.2 + modelInfo.outputPrice = 6 + modelInfo.cacheReadsPrice = 0.24 + modelInfo.cacheWritesPrice = 0 // Free + modelInfo.tiers = [ + { + contextWindow: 1_000_000, + inputPrice: 3, + outputPrice: 15, + cacheReadsPrice: 0.6, + cacheWritesPrice: 0, // Free + }, + ] + } + return modelInfo } diff --git a/src/shared/cost.ts b/src/shared/cost.ts index fea686d8aed8..76aecf3754df 100644 --- a/src/shared/cost.ts +++ b/src/shared/cost.ts @@ -6,6 +6,65 @@ export interface ApiCostResult { totalCost: number } +/** + * Finds the appropriate pricing tier based on the total input tokens. + * Returns the prices from the matching tier, or the base prices if no tiers are defined. + */ +function getTieredPricing( + modelInfo: ModelInfo, + totalInputTokens: number, +): { + inputPrice: number | undefined + outputPrice: number | undefined + cacheWritesPrice: number | undefined + cacheReadsPrice: number | undefined +} { + // If there are no tiers defined, use the base prices + if (!modelInfo.tiers || modelInfo.tiers.length === 0) { + return { + inputPrice: modelInfo.inputPrice, + outputPrice: modelInfo.outputPrice, + cacheWritesPrice: modelInfo.cacheWritesPrice, + cacheReadsPrice: modelInfo.cacheReadsPrice, + } + } + + // If within base context window, use base prices + if (totalInputTokens <= modelInfo.contextWindow) { + return { + inputPrice: modelInfo.inputPrice, + outputPrice: modelInfo.outputPrice, + cacheWritesPrice: modelInfo.cacheWritesPrice, + cacheReadsPrice: modelInfo.cacheReadsPrice, + } + } + + // Find the appropriate tier based on the total input tokens + // Tiers are checked in order, and we use the first tier where the token count + // is less than or equal to the tier's context window + const tier = modelInfo.tiers.find((tier) => totalInputTokens <= tier.contextWindow) + + if (tier) { + // Use tier prices, falling back to base prices if not defined in the tier + return { + inputPrice: tier.inputPrice ?? modelInfo.inputPrice, + outputPrice: tier.outputPrice ?? modelInfo.outputPrice, + cacheWritesPrice: tier.cacheWritesPrice ?? modelInfo.cacheWritesPrice, + cacheReadsPrice: tier.cacheReadsPrice ?? modelInfo.cacheReadsPrice, + } + } + + // If no tier matches (all tiers have smaller context windows than the token count), + // use the last (highest) tier's prices + const lastTier = modelInfo.tiers[modelInfo.tiers.length - 1] + return { + inputPrice: lastTier.inputPrice ?? modelInfo.inputPrice, + outputPrice: lastTier.outputPrice ?? modelInfo.outputPrice, + cacheWritesPrice: lastTier.cacheWritesPrice ?? modelInfo.cacheWritesPrice, + cacheReadsPrice: lastTier.cacheReadsPrice ?? modelInfo.cacheReadsPrice, + } +} + function calculateApiCostInternal( modelInfo: ModelInfo, inputTokens: number, @@ -15,10 +74,13 @@ function calculateApiCostInternal( totalInputTokens: number, totalOutputTokens: number, ): ApiCostResult { - const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens - const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens - const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens - const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens + // Get the appropriate prices based on the total input tokens (for tiered pricing) + const { inputPrice, outputPrice, cacheWritesPrice, cacheReadsPrice } = getTieredPricing(modelInfo, totalInputTokens) + + const cacheWritesCost = ((cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens + const cacheReadsCost = ((cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens + const baseInputCost = ((inputPrice || 0) / 1_000_000) * inputTokens + const outputCost = ((outputPrice || 0) / 1_000_000) * outputTokens const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost return {