Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
}

if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
const { totalCost } = calculateApiCostAnthropic(
this.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
)

yield {
type: "usage",
inputTokens: 0,
outputTokens: 0,
totalCost: calculateApiCostAnthropic(
this.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
),
totalCost,
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/api/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
const { info } = this.getModel()
// Use actual token usage from the last request
const { inputTokens, outputTokens } = this.lastUsage
return calculateApiCostOpenAI(info, inputTokens, outputTokens)
const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens)
return totalCost
}
}
4 changes: 2 additions & 2 deletions src/api/providers/deepinfra.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion
const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0

const totalCost = modelInfo
const { totalCost } = modelInfo
? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
: 0
: { totalCost: 0 }

return {
type: "usage",
Expand Down
2 changes: 1 addition & 1 deletion src/api/providers/groq.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ export class GroqHandler extends BaseOpenAiCompatibleProvider<GroqModelId> {
const cacheWriteTokens = 0

// Calculate cost using OpenAI-compatible cost calculation
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)

yield {
type: "usage",
Expand Down
17 changes: 9 additions & 8 deletions src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,22 +165,23 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
(lastUsage as any).prompt_cache_hit_tokens ||
0

const { totalCost } = calculateApiCostOpenAI(
info,
lastUsage.prompt_tokens || 0,
lastUsage.completion_tokens || 0,
cacheWriteTokens,
cacheReadTokens,
)

const usageData: ApiStreamUsageChunk = {
type: "usage",
inputTokens: lastUsage.prompt_tokens || 0,
outputTokens: lastUsage.completion_tokens || 0,
cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
totalCost,
}

usageData.totalCost = calculateApiCostOpenAI(
info,
usageData.inputTokens,
usageData.outputTokens,
usageData.cacheWriteTokens || 0,
usageData.cacheReadTokens || 0,
)

yield usageData
}
} catch (error) {
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/openai-native.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)

// Pass total input tokens directly to calculateApiCostOpenAI
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
const totalCost = calculateApiCostOpenAI(
// The function handles subtracting both cache reads and writes internally
const { totalCost } = calculateApiCostOpenAI(
effectiveInfo,
totalInputTokens,
totalOutputTokens,
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
const outputTokens = requestyUsage?.completion_tokens || 0
const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0
const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0
const totalCost = modelInfo
const { totalCost } = modelInfo
? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
: 0
: { totalCost: 0 }

return {
type: "usage",
Expand Down
75 changes: 51 additions & 24 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ import { RooTerminalProcess } from "../../integrations/terminal/types"
import { TerminalRegistry } from "../../integrations/terminal/TerminalRegistry"

// utils
import { calculateApiCostAnthropic } from "../../shared/cost"
import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../../shared/cost"
import { getWorkspacePath } from "../../utils/path"

// prompts
Expand Down Expand Up @@ -1886,21 +1886,35 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
}

const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")

// Calculate total tokens and cost using provider-aware function
const modelId = getModelId(this.apiConfiguration)
const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)

const costResult =
apiProtocol === "anthropic"
? calculateApiCostAnthropic(
this.api.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
)
: calculateApiCostOpenAI(
this.api.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
)

this.clineMessages[lastApiReqIndex].text = JSON.stringify({
...existingData,
tokensIn: inputTokens,
tokensOut: outputTokens,
tokensIn: costResult.totalInputTokens,
tokensOut: costResult.totalOutputTokens,
cacheWrites: cacheWriteTokens,
cacheReads: cacheReadTokens,
cost:
totalCost ??
calculateApiCostAnthropic(
this.api.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
),
cost: totalCost ?? costResult.totalCost,
cancelReason,
streamingFailedMessage,
} satisfies ClineApiReqInfo)
Expand Down Expand Up @@ -2104,21 +2118,34 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
await this.updateClineMessage(apiReqMessage)
}

// Capture telemetry
// Capture telemetry with provider-aware cost calculation
const modelId = getModelId(this.apiConfiguration)
const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)

// Use the appropriate cost function based on the API protocol
const costResult =
apiProtocol === "anthropic"
? calculateApiCostAnthropic(
this.api.getModel().info,
tokens.input,
tokens.output,
tokens.cacheWrite,
tokens.cacheRead,
)
: calculateApiCostOpenAI(
this.api.getModel().info,
tokens.input,
tokens.output,
tokens.cacheWrite,
tokens.cacheRead,
)

TelemetryService.instance.captureLlmCompletion(this.taskId, {
inputTokens: tokens.input,
outputTokens: tokens.output,
inputTokens: costResult.totalInputTokens,
outputTokens: costResult.totalOutputTokens,
cacheWriteTokens: tokens.cacheWrite,
cacheReadTokens: tokens.cacheRead,
cost:
tokens.total ??
calculateApiCostAnthropic(
this.api.getModel().info,
tokens.input,
tokens.output,
tokens.cacheWrite,
tokens.cacheRead,
),
cost: tokens.total ?? costResult.totalCost,
})
}
}
Expand Down
38 changes: 32 additions & 6 deletions src/shared/cost.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,31 @@
import type { ModelInfo } from "@roo-code/types"

export interface ApiCostResult {
totalInputTokens: number
totalOutputTokens: number
totalCost: number
}

function calculateApiCostInternal(
modelInfo: ModelInfo,
inputTokens: number,
outputTokens: number,
cacheCreationInputTokens: number,
cacheReadInputTokens: number,
): number {
totalInputTokens: number,
totalOutputTokens: number,
): ApiCostResult {
const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
return totalCost

return {
totalInputTokens,
totalOutputTokens,
totalCost,
}
}

// For Anthropic compliant usage, the input tokens count does NOT include the
Expand All @@ -23,13 +36,22 @@ export function calculateApiCostAnthropic(
outputTokens: number,
cacheCreationInputTokens?: number,
cacheReadInputTokens?: number,
): number {
): ApiCostResult {
const cacheCreation = cacheCreationInputTokens || 0
const cacheRead = cacheReadInputTokens || 0

// For Anthropic: inputTokens does NOT include cached tokens
// Total input = base input + cache creation + cache reads
const totalInputTokens = inputTokens + cacheCreation + cacheRead

return calculateApiCostInternal(
modelInfo,
inputTokens,
outputTokens,
cacheCreationInputTokens || 0,
cacheReadInputTokens || 0,
cacheCreation,
cacheRead,
totalInputTokens,
outputTokens,
)
}

Expand All @@ -40,17 +62,21 @@ export function calculateApiCostOpenAI(
outputTokens: number,
cacheCreationInputTokens?: number,
cacheReadInputTokens?: number,
): number {
): ApiCostResult {
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
const cacheReadInputTokensNum = cacheReadInputTokens || 0
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)

// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
// So we pass the original inputTokens as the total
return calculateApiCostInternal(
modelInfo,
nonCachedInputTokens,
outputTokens,
cacheCreationInputTokensNum,
cacheReadInputTokensNum,
inputTokens,
outputTokens,
)
}

Expand Down
15 changes: 6 additions & 9 deletions src/shared/getApiMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,12 @@ export function getApiMetrics(messages: ClineMessage[]) {
if (message.type === "say" && message.say === "api_req_started" && message.text) {
try {
const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text)
const { tokensIn, tokensOut, cacheWrites, cacheReads, apiProtocol } = parsedText

// Calculate context tokens based on API protocol.
if (apiProtocol === "anthropic") {
result.contextTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
} else {
// For OpenAI (or when protocol is not specified).
result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
}
const { tokensIn, tokensOut } = parsedText
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ParsedApiReqStartedTextType marks tokensIn/tokensOut/cacheWrites/cacheReads as required, but this block guards for missing fields with typeof checks. api_req_started payloads may not always include these keys (e.g., before usage aggregation), so the type should make them optional to match runtime reality and prevent misleading typings. Suggested change: make these fields optional in getApiMetrics.ts.


// Since tokensIn now stores TOTAL input tokens (including cache tokens),
// we no longer need to add cacheWrites and cacheReads separately.
// This applies to both Anthropic and OpenAI protocols.
result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
} catch (error) {
console.error("Error parsing JSON:", error)
continue
Expand Down
Loading