Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { getModelParams } from "../transform/model-params"

import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { calculateApiCostAnthropic } from "../../shared/cost"

export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
private options: ApiHandlerOptions
Expand Down Expand Up @@ -132,20 +133,35 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
}
}

let inputTokens = 0
let outputTokens = 0
let cacheWriteTokens = 0
let cacheReadTokens = 0

for await (const chunk of stream) {
switch (chunk.type) {
case "message_start": {
// Tells us cache reads/writes/input/output.
const usage = chunk.message.usage
const {
input_tokens = 0,
output_tokens = 0,
cache_creation_input_tokens,
cache_read_input_tokens,
} = chunk.message.usage

yield {
type: "usage",
inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0,
cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
cacheReadTokens: usage.cache_read_input_tokens || undefined,
inputTokens: input_tokens,
outputTokens: output_tokens,
cacheWriteTokens: cache_creation_input_tokens || undefined,
cacheReadTokens: cache_read_input_tokens || undefined,
}

inputTokens += input_tokens
outputTokens += output_tokens
cacheWriteTokens += cache_creation_input_tokens || 0
cacheReadTokens += cache_read_input_tokens || 0

break
}
case "message_delta":
Expand Down Expand Up @@ -198,6 +214,21 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
break
}
}

if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
yield {
type: "usage",
inputTokens: 0,
outputTokens: 0,
totalCost: calculateApiCostAnthropic(
this.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
),
}
}
}

getModel() {
Expand Down
19 changes: 11 additions & 8 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1439,19 +1439,22 @@ export class Task extends EventEmitter<ClineEvents> {
} finally {
this.isStreaming = false
}
if (
inputTokens > 0 ||
outputTokens > 0 ||
cacheWriteTokens > 0 ||
cacheReadTokens > 0 ||
typeof totalCost !== "undefined"
) {

if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
TelemetryService.instance.captureLlmCompletion(this.taskId, {
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
cost: totalCost,
cost:
totalCost ??
calculateApiCostAnthropic(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we'd require all providers to emit a totalCost, but there's no enforcement mechanism for that right now, so we'll re-use this fallback.

this.api.getModel().info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
),
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import * as fileSearch from "../../../services/search/file-search"

import { RepoPerTaskCheckpointService } from "../RepoPerTaskCheckpointService"

vitest.setConfig({ testTimeout: 10_000 })
vitest.setConfig({ testTimeout: 20_000 })
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is flaking for the win32 GHA runner.


const tmpDir = path.join(os.tmpdir(), "CheckpointService")

Expand Down
12 changes: 6 additions & 6 deletions src/shared/cost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,25 @@ function calculateApiCostInternal(
return totalCost
}

// For Anthropic compliant usage, the input tokens count does NOT include the cached tokens
// For Anthropic compliant usage, the input tokens count does NOT include the
// cached tokens.
export function calculateApiCostAnthropic(
modelInfo: ModelInfo,
inputTokens: number,
outputTokens: number,
cacheCreationInputTokens?: number,
cacheReadInputTokens?: number,
): number {
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
const cacheReadInputTokensNum = cacheReadInputTokens || 0
return calculateApiCostInternal(
modelInfo,
inputTokens,
outputTokens,
cacheCreationInputTokensNum,
cacheReadInputTokensNum,
cacheCreationInputTokens || 0,
cacheReadInputTokens || 0,
)
}

// For OpenAI compliant usage, the input tokens count INCLUDES the cached tokens
// For OpenAI compliant usage, the input tokens count INCLUDES the cached tokens.
export function calculateApiCostOpenAI(
modelInfo: ModelInfo,
inputTokens: number,
Expand All @@ -45,6 +44,7 @@ export function calculateApiCostOpenAI(
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
const cacheReadInputTokensNum = cacheReadInputTokens || 0
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)

return calculateApiCostInternal(
modelInfo,
nonCachedInputTokens,
Expand Down
Loading