diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index b2e158eca53e..0e767ce23797 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -230,17 +230,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { + const { totalCost } = calculateApiCostAnthropic( + this.getModel().info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) + yield { type: "usage", inputTokens: 0, outputTokens: 0, - totalCost: calculateApiCostAnthropic( - this.getModel().info, - inputTokens, - outputTokens, - cacheWriteTokens, - cacheReadTokens, - ), + totalCost, } } } diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index a0421844e815..16dfa282adb1 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -331,6 +331,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan const { info } = this.getModel() // Use actual token usage from the last request const { inputTokens, outputTokens } = this.lastUsage - return calculateApiCostOpenAI(info, inputTokens, outputTokens) + const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens) + return totalCost } } diff --git a/src/api/providers/deepinfra.ts b/src/api/providers/deepinfra.ts index 7cf018b069f5..fb8c117ae013 100644 --- a/src/api/providers/deepinfra.ts +++ b/src/api/providers/deepinfra.ts @@ -131,9 +131,9 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0 const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0 - const totalCost = modelInfo + const { totalCost } = modelInfo ? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) - : 0 + : { totalCost: 0 } return { type: "usage", diff --git a/src/api/providers/groq.ts b/src/api/providers/groq.ts index b66e42d7f016..c2f2dd19db98 100644 --- a/src/api/providers/groq.ts +++ b/src/api/providers/groq.ts @@ -64,7 +64,7 @@ export class GroqHandler extends BaseOpenAiCompatibleProvider { const cacheWriteTokens = 0 // Calculate cost using OpenAI-compatible cost calculation - const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) + const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) yield { type: "usage", diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 9f58f092234d..43bf33c38be0 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -165,22 +165,23 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa (lastUsage as any).prompt_cache_hit_tokens || 0 + const { totalCost } = calculateApiCostOpenAI( + info, + lastUsage.prompt_tokens || 0, + lastUsage.completion_tokens || 0, + cacheWriteTokens, + cacheReadTokens, + ) + const usageData: ApiStreamUsageChunk = { type: "usage", inputTokens: lastUsage.prompt_tokens || 0, outputTokens: lastUsage.completion_tokens || 0, cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined, cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined, + totalCost, } - usageData.totalCost = calculateApiCostOpenAI( - info, - usageData.inputTokens, - usageData.outputTokens, - usageData.cacheWriteTokens || 0, - usageData.cacheReadTokens || 0, - ) - yield usageData } } catch (error) { diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 8a205a06b453..daf6278822b5 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -99,8 +99,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier) // Pass total input tokens directly to calculateApiCostOpenAI - // The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46) - const totalCost = calculateApiCostOpenAI( + // The function handles subtracting both cache reads and writes internally + const { totalCost } = calculateApiCostOpenAI( effectiveInfo, totalInputTokens, totalOutputTokens, diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 16aefae52861..1c0e9ed64075 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -85,9 +85,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan const outputTokens = requestyUsage?.completion_tokens || 0 const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0 const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0 - const totalCost = modelInfo + const { totalCost } = modelInfo ? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) - : 0 + : { totalCost: 0 } return { type: "usage", diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 8eb15441bd88..9c199a2a1bff 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -74,7 +74,7 @@ import { RooTerminalProcess } from "../../integrations/terminal/types" import { TerminalRegistry } from "../../integrations/terminal/TerminalRegistry" // utils -import { calculateApiCostAnthropic } from "../../shared/cost" +import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../../shared/cost" import { getWorkspacePath } from "../../utils/path" // prompts @@ -1886,21 +1886,35 @@ export class Task extends EventEmitter implements TaskLike { } const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}") + + // Calculate total tokens and cost using provider-aware function + const modelId = getModelId(this.apiConfiguration) + const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId) + + const costResult = + apiProtocol === "anthropic" + ? calculateApiCostAnthropic( + this.api.getModel().info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) + : calculateApiCostOpenAI( + this.api.getModel().info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) + this.clineMessages[lastApiReqIndex].text = JSON.stringify({ ...existingData, - tokensIn: inputTokens, - tokensOut: outputTokens, + tokensIn: costResult.totalInputTokens, + tokensOut: costResult.totalOutputTokens, cacheWrites: cacheWriteTokens, cacheReads: cacheReadTokens, - cost: - totalCost ?? - calculateApiCostAnthropic( - this.api.getModel().info, - inputTokens, - outputTokens, - cacheWriteTokens, - cacheReadTokens, - ), + cost: totalCost ?? costResult.totalCost, cancelReason, streamingFailedMessage, } satisfies ClineApiReqInfo) @@ -2104,21 +2118,34 @@ export class Task extends EventEmitter implements TaskLike { await this.updateClineMessage(apiReqMessage) } - // Capture telemetry + // Capture telemetry with provider-aware cost calculation + const modelId = getModelId(this.apiConfiguration) + const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId) + + // Use the appropriate cost function based on the API protocol + const costResult = + apiProtocol === "anthropic" + ? calculateApiCostAnthropic( + this.api.getModel().info, + tokens.input, + tokens.output, + tokens.cacheWrite, + tokens.cacheRead, + ) + : calculateApiCostOpenAI( + this.api.getModel().info, + tokens.input, + tokens.output, + tokens.cacheWrite, + tokens.cacheRead, + ) + TelemetryService.instance.captureLlmCompletion(this.taskId, { - inputTokens: tokens.input, - outputTokens: tokens.output, + inputTokens: costResult.totalInputTokens, + outputTokens: costResult.totalOutputTokens, cacheWriteTokens: tokens.cacheWrite, cacheReadTokens: tokens.cacheRead, - cost: - tokens.total ?? - calculateApiCostAnthropic( - this.api.getModel().info, - tokens.input, - tokens.output, - tokens.cacheWrite, - tokens.cacheRead, - ), + cost: tokens.total ?? costResult.totalCost, }) } } diff --git a/src/shared/cost.ts b/src/shared/cost.ts index a628756b0dbd..fea686d8aed8 100644 --- a/src/shared/cost.ts +++ b/src/shared/cost.ts @@ -1,18 +1,31 @@ import type { ModelInfo } from "@roo-code/types" +export interface ApiCostResult { + totalInputTokens: number + totalOutputTokens: number + totalCost: number +} + function calculateApiCostInternal( modelInfo: ModelInfo, inputTokens: number, outputTokens: number, cacheCreationInputTokens: number, cacheReadInputTokens: number, -): number { + totalInputTokens: number, + totalOutputTokens: number, +): ApiCostResult { const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost - return totalCost + + return { + totalInputTokens, + totalOutputTokens, + totalCost, + } } // For Anthropic compliant usage, the input tokens count does NOT include the @@ -23,13 +36,22 @@ export function calculateApiCostAnthropic( outputTokens: number, cacheCreationInputTokens?: number, cacheReadInputTokens?: number, -): number { +): ApiCostResult { + const cacheCreation = cacheCreationInputTokens || 0 + const cacheRead = cacheReadInputTokens || 0 + + // For Anthropic: inputTokens does NOT include cached tokens + // Total input = base input + cache creation + cache reads + const totalInputTokens = inputTokens + cacheCreation + cacheRead + return calculateApiCostInternal( modelInfo, inputTokens, outputTokens, - cacheCreationInputTokens || 0, - cacheReadInputTokens || 0, + cacheCreation, + cacheRead, + totalInputTokens, + outputTokens, ) } @@ -40,17 +62,21 @@ export function calculateApiCostOpenAI( outputTokens: number, cacheCreationInputTokens?: number, cacheReadInputTokens?: number, -): number { +): ApiCostResult { const cacheCreationInputTokensNum = cacheCreationInputTokens || 0 const cacheReadInputTokensNum = cacheReadInputTokens || 0 const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum) + // For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached) + // So we pass the original inputTokens as the total return calculateApiCostInternal( modelInfo, nonCachedInputTokens, outputTokens, cacheCreationInputTokensNum, cacheReadInputTokensNum, + inputTokens, + outputTokens, ) } diff --git a/src/shared/getApiMetrics.ts b/src/shared/getApiMetrics.ts index 64fecbd9f469..2ae475676483 100644 --- a/src/shared/getApiMetrics.ts +++ b/src/shared/getApiMetrics.ts @@ -80,15 +80,12 @@ export function getApiMetrics(messages: ClineMessage[]) { if (message.type === "say" && message.say === "api_req_started" && message.text) { try { const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text) - const { tokensIn, tokensOut, cacheWrites, cacheReads, apiProtocol } = parsedText - - // Calculate context tokens based on API protocol. - if (apiProtocol === "anthropic") { - result.contextTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) - } else { - // For OpenAI (or when protocol is not specified). - result.contextTokens = (tokensIn || 0) + (tokensOut || 0) - } + const { tokensIn, tokensOut } = parsedText + + // Since tokensIn now stores TOTAL input tokens (including cache tokens), + // we no longer need to add cacheWrites and cacheReads separately. + // This applies to both Anthropic and OpenAI protocols. + result.contextTokens = (tokensIn || 0) + (tokensOut || 0) } catch (error) { console.error("Error parsing JSON:", error) continue diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts index 10ae279e48d2..83d268713697 100644 --- a/src/utils/__tests__/cost.spec.ts +++ b/src/utils/__tests__/cost.spec.ts @@ -17,43 +17,51 @@ describe("Cost Utility", () => { } it("should calculate basic input/output costs correctly", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500) + const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(1000) + expect(result.totalOutputTokens).toBe(500) }) it("should handle cache writes cost", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000) + const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 // Total: 0.003 + 0.0075 + 0.0075 = 0.018 - expect(cost).toBeCloseTo(0.018, 6) + expect(result.totalCost).toBeCloseTo(0.018, 6) + expect(result.totalInputTokens).toBe(3000) // 1000 + 2000 + expect(result.totalOutputTokens).toBe(500) }) it("should handle cache reads cost", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000) + const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 // Total: 0.003 + 0.0075 + 0.0009 = 0.0114 - expect(cost).toBe(0.0114) + expect(result.totalCost).toBe(0.0114) + expect(result.totalInputTokens).toBe(4000) // 1000 + 3000 + expect(result.totalOutputTokens).toBe(500) }) it("should handle all cost components together", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000) + const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 // Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189 - expect(cost).toBe(0.0189) + expect(result.totalCost).toBe(0.0189) + expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000 + expect(result.totalOutputTokens).toBe(500) }) it("should handle missing prices gracefully", () => { @@ -63,22 +71,28 @@ describe("Cost Utility", () => { supportsPromptCache: true, } - const cost = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000) - expect(cost).toBe(0) + const result = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000) + expect(result.totalCost).toBe(0) + expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000 + expect(result.totalOutputTokens).toBe(500) }) it("should handle zero tokens", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0) - expect(cost).toBe(0) + const result = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0) + expect(result.totalCost).toBe(0) + expect(result.totalInputTokens).toBe(0) + expect(result.totalOutputTokens).toBe(0) }) it("should handle undefined cache values", () => { - const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500) + const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(1000) + expect(result.totalOutputTokens).toBe(500) }) it("should handle missing cache prices", () => { @@ -88,13 +102,15 @@ describe("Cost Utility", () => { cacheReadsPrice: undefined, } - const cost = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000) + const result = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000) // Should only include input and output costs // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000 + expect(result.totalOutputTokens).toBe(500) }) }) @@ -110,43 +126,51 @@ describe("Cost Utility", () => { } it("should calculate basic input/output costs correctly", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500) + const result = calculateApiCostOpenAI(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(1000) + expect(result.totalOutputTokens).toBe(500) }) it("should handle cache writes cost", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000) + const result = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000) // Input cost: (3.0 / 1_000_000) * (3000 - 2000) = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 // Total: 0.003 + 0.0075 + 0.0075 = 0.018 - expect(cost).toBeCloseTo(0.018, 6) + expect(result.totalCost).toBeCloseTo(0.018, 6) + expect(result.totalInputTokens).toBe(3000) // Total already includes cache + expect(result.totalOutputTokens).toBe(500) }) it("should handle cache reads cost", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000) + const result = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000) // Input cost: (3.0 / 1_000_000) * (4000 - 3000) = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 // Total: 0.003 + 0.0075 + 0.0009 = 0.0114 - expect(cost).toBe(0.0114) + expect(result.totalCost).toBe(0.0114) + expect(result.totalInputTokens).toBe(4000) // Total already includes cache + expect(result.totalOutputTokens).toBe(500) }) it("should handle all cost components together", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000) + const result = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000) // Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 // Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189 - expect(cost).toBe(0.0189) + expect(result.totalCost).toBe(0.0189) + expect(result.totalInputTokens).toBe(6000) // Total already includes cache + expect(result.totalOutputTokens).toBe(500) }) it("should handle missing prices gracefully", () => { @@ -156,22 +180,28 @@ describe("Cost Utility", () => { supportsPromptCache: true, } - const cost = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000) - expect(cost).toBe(0) + const result = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000) + expect(result.totalCost).toBe(0) + expect(result.totalInputTokens).toBe(1000) // Total already includes cache + expect(result.totalOutputTokens).toBe(500) }) it("should handle zero tokens", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0) - expect(cost).toBe(0) + const result = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0) + expect(result.totalCost).toBe(0) + expect(result.totalInputTokens).toBe(0) + expect(result.totalOutputTokens).toBe(0) }) it("should handle undefined cache values", () => { - const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500) + const result = calculateApiCostOpenAI(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(1000) + expect(result.totalOutputTokens).toBe(500) }) it("should handle missing cache prices", () => { @@ -181,13 +211,15 @@ describe("Cost Utility", () => { cacheReadsPrice: undefined, } - const cost = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000) + const result = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000) // Should only include input and output costs // Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 // Total: 0.003 + 0.0075 = 0.0105 - expect(cost).toBe(0.0105) + expect(result.totalCost).toBe(0.0105) + expect(result.totalInputTokens).toBe(6000) // Total already includes cache + expect(result.totalOutputTokens).toBe(500) }) }) })