Fix cost and token tracking between provider styles (#8954)

mrubens · web-flow · commit 416fa5727e66 · 2025-10-31T15:14:29.000-04:00
diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
@@ -230,17 +230,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		}
 
 		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
+			const { totalCost } = calculateApiCostAnthropic(
+				this.getModel().info,
+				inputTokens,
+				outputTokens,
+				cacheWriteTokens,
+				cacheReadTokens,
+			)
+
 			yield {
 				type: "usage",
 				inputTokens: 0,
 				outputTokens: 0,
-				totalCost: calculateApiCostAnthropic(
-					this.getModel().info,
-					inputTokens,
-					outputTokens,
-					cacheWriteTokens,
-					cacheReadTokens,
-				),
+				totalCost,
 			}
 		}
 	}
diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts
@@ -331,6 +331,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 		const { info } = this.getModel()
 		// Use actual token usage from the last request
 		const { inputTokens, outputTokens } = this.lastUsage
-		return calculateApiCostOpenAI(info, inputTokens, outputTokens)
+		const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens)
+		return totalCost
 	}
 }
diff --git a/src/api/providers/deepinfra.ts b/src/api/providers/deepinfra.ts
@@ -131,9 +131,9 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion
 		const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0
 		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
 
-		const totalCost = modelInfo
+		const { totalCost } = modelInfo
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
-			: 0
+			: { totalCost: 0 }
 
 		return {
 			type: "usage",
diff --git a/src/api/providers/groq.ts b/src/api/providers/groq.ts
@@ -64,7 +64,7 @@ export class GroqHandler extends BaseOpenAiCompatibleProvider<GroqModelId> {
 		const cacheWriteTokens = 0
 
 		// Calculate cost using OpenAI-compatible cost calculation
-		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+		const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
 
 		yield {
 			type: "usage",
diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts
@@ -165,22 +165,23 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 					(lastUsage as any).prompt_cache_hit_tokens ||
 					0
 
+				const { totalCost } = calculateApiCostOpenAI(
+					info,
+					lastUsage.prompt_tokens || 0,
+					lastUsage.completion_tokens || 0,
+					cacheWriteTokens,
+					cacheReadTokens,
+				)
+
 				const usageData: ApiStreamUsageChunk = {
 					type: "usage",
 					inputTokens: lastUsage.prompt_tokens || 0,
 					outputTokens: lastUsage.completion_tokens || 0,
 					cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 					cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
+					totalCost,
 				}
 
-				usageData.totalCost = calculateApiCostOpenAI(
-					info,
-					usageData.inputTokens,
-					usageData.outputTokens,
-					usageData.cacheWriteTokens || 0,
-					usageData.cacheReadTokens || 0,
-				)
-
 				yield usageData
 			}
 		} catch (error) {
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -99,8 +99,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
 
 		// Pass total input tokens directly to calculateApiCostOpenAI
-		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
-		const totalCost = calculateApiCostOpenAI(
+		// The function handles subtracting both cache reads and writes internally
+		const { totalCost } = calculateApiCostOpenAI(
 			effectiveInfo,
 			totalInputTokens,
 			totalOutputTokens,
diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts
@@ -85,9 +85,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		const outputTokens = requestyUsage?.completion_tokens || 0
 		const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0
 		const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0
-		const totalCost = modelInfo
+		const { totalCost } = modelInfo
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
-			: 0
+			: { totalCost: 0 }
 
 		return {
 			type: "usage",
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -74,7 +74,7 @@ import { RooTerminalProcess } from "../../integrations/terminal/types"
 import { TerminalRegistry } from "../../integrations/terminal/TerminalRegistry"
 
 // utils
-import { calculateApiCostAnthropic } from "../../shared/cost"
+import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../../shared/cost"
 import { getWorkspacePath } from "../../utils/path"
 
 // prompts
@@ -1886,21 +1886,35 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					}
 
 					const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")
+
+					// Calculate total tokens and cost using provider-aware function
+					const modelId = getModelId(this.apiConfiguration)
+					const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
+
+					const costResult =
+						apiProtocol === "anthropic"
+							? calculateApiCostAnthropic(
+									this.api.getModel().info,
+									inputTokens,
+									outputTokens,
+									cacheWriteTokens,
+									cacheReadTokens,
+								)
+							: calculateApiCostOpenAI(
+									this.api.getModel().info,
+									inputTokens,
+									outputTokens,
+									cacheWriteTokens,
+									cacheReadTokens,
+								)
+
 					this.clineMessages[lastApiReqIndex].text = JSON.stringify({
 						...existingData,
-						tokensIn: inputTokens,
-						tokensOut: outputTokens,
+						tokensIn: costResult.totalInputTokens,
+						tokensOut: costResult.totalOutputTokens,
 						cacheWrites: cacheWriteTokens,
 						cacheReads: cacheReadTokens,
-						cost:
-							totalCost ??
-							calculateApiCostAnthropic(
-								this.api.getModel().info,
-								inputTokens,
-								outputTokens,
-								cacheWriteTokens,
-								cacheReadTokens,
-							),
+						cost: totalCost ?? costResult.totalCost,
 						cancelReason,
 						streamingFailedMessage,
 					} satisfies ClineApiReqInfo)
@@ -2104,21 +2118,34 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 									await this.updateClineMessage(apiReqMessage)
 								}
 
-								// Capture telemetry
+								// Capture telemetry with provider-aware cost calculation
+								const modelId = getModelId(this.apiConfiguration)
+								const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
+
+								// Use the appropriate cost function based on the API protocol
+								const costResult =
+									apiProtocol === "anthropic"
+										? calculateApiCostAnthropic(
+												this.api.getModel().info,
+												tokens.input,
+												tokens.output,
+												tokens.cacheWrite,
+												tokens.cacheRead,
+											)
+										: calculateApiCostOpenAI(
+												this.api.getModel().info,
+												tokens.input,
+												tokens.output,
+												tokens.cacheWrite,
+												tokens.cacheRead,
+											)
+
 								TelemetryService.instance.captureLlmCompletion(this.taskId, {
-									inputTokens: tokens.input,
-									outputTokens: tokens.output,
+									inputTokens: costResult.totalInputTokens,
+									outputTokens: costResult.totalOutputTokens,
 									cacheWriteTokens: tokens.cacheWrite,
 									cacheReadTokens: tokens.cacheRead,
-									cost:
-										tokens.total ??
-										calculateApiCostAnthropic(
-											this.api.getModel().info,
-											tokens.input,
-											tokens.output,
-											tokens.cacheWrite,
-											tokens.cacheRead,
-										),
+									cost: tokens.total ?? costResult.totalCost,
 								})
 							}
 						}
diff --git a/src/shared/cost.ts b/src/shared/cost.ts
@@ -1,18 +1,31 @@
 import type { ModelInfo } from "@roo-code/types"
 
+export interface ApiCostResult {
+	totalInputTokens: number
+	totalOutputTokens: number
+	totalCost: number
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
 	outputTokens: number,
 	cacheCreationInputTokens: number,
 	cacheReadInputTokens: number,
-): number {
+	totalInputTokens: number,
+	totalOutputTokens: number,
+): ApiCostResult {
 	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
 	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
 	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
 	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
-	return totalCost
+
+	return {
+		totalInputTokens,
+		totalOutputTokens,
+		totalCost,
+	}
 }
 
 // For Anthropic compliant usage, the input tokens count does NOT include the
@@ -23,13 +36,22 @@ export function calculateApiCostAnthropic(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
-): number {
+): ApiCostResult {
+	const cacheCreation = cacheCreationInputTokens || 0
+	const cacheRead = cacheReadInputTokens || 0
+
+	// For Anthropic: inputTokens does NOT include cached tokens
+	// Total input = base input + cache creation + cache reads
+	const totalInputTokens = inputTokens + cacheCreation + cacheRead
+
 	return calculateApiCostInternal(
 		modelInfo,
 		inputTokens,
 		outputTokens,
-		cacheCreationInputTokens || 0,
-		cacheReadInputTokens || 0,
+		cacheCreation,
+		cacheRead,
+		totalInputTokens,
+		outputTokens,
 	)
 }
 
@@ -40,17 +62,21 @@ export function calculateApiCostOpenAI(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
-): number {
+): ApiCostResult {
 	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
 	const cacheReadInputTokensNum = cacheReadInputTokens || 0
 	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
 
+	// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
+	// So we pass the original inputTokens as the total
 	return calculateApiCostInternal(
 		modelInfo,
 		nonCachedInputTokens,
 		outputTokens,
 		cacheCreationInputTokensNum,
 		cacheReadInputTokensNum,
+		inputTokens,
+		outputTokens,
 	)
 }
 
diff --git a/src/shared/getApiMetrics.ts b/src/shared/getApiMetrics.ts
@@ -80,15 +80,12 @@ export function getApiMetrics(messages: ClineMessage[]) {
 		if (message.type === "say" && message.say === "api_req_started" && message.text) {
 			try {
 				const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text)
-				const { tokensIn, tokensOut, cacheWrites, cacheReads, apiProtocol } = parsedText
-
-				// Calculate context tokens based on API protocol.
-				if (apiProtocol === "anthropic") {
-					result.contextTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-				} else {
-					// For OpenAI (or when protocol is not specified).
-					result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
-				}
+				const { tokensIn, tokensOut } = parsedText
+
+				// Since tokensIn now stores TOTAL input tokens (including cache tokens),
+				// we no longer need to add cacheWrites and cacheReads separately.
+				// This applies to both Anthropic and OpenAI protocols.
+				result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
 			} catch (error) {
 				console.error("Error parsing JSON:", error)
 				continue
diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts

Original file line number	Diff line number	Diff line change
`@@ -331,6 +331,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan`
`331`	`331`	`const { info } = this.getModel()`
`332`	`332`	`// Use actual token usage from the last request`
`333`	`333`	`const { inputTokens, outputTokens } = this.lastUsage`
`334`		`- return calculateApiCostOpenAI(info, inputTokens, outputTokens)`
	`334`	`+ const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens)`
	`335`	`+ return totalCost`
`335`	`336`	`}`
`336`	`337`	`}`