Skip to content

Commit d6184e9

Browse files
authored
OpenAI & DeepSeek cost calculation (RooCodeInc#1864)
* Add OpenAI compatible cost calculation * Requesty: Prepare for correct price calculation * Native OpenAI: Update model caching info According to [OpenAI's website](https://platform.openai.com/docs/guides/prompt-caching), gpt-4o, gpt-4o-mini, o1-preview and o1-mini support caching. For gpt-4o, even though gpt-4o-2024-05-13 and chatgpt-4o-latest do no support caching, users will see there are no cached tokens, which will help avoid confusion. * Native OpenAI: Call getModel once * Native OpenAI: Extract yield usage into method * Native OpenAI: Add caching and cost info to task header * DeepSeek: Add cost info to task header * Add changeset
1 parent d01b994 commit d6184e9

File tree

9 files changed

+210
-67
lines changed

9 files changed

+210
-67
lines changed

.changeset/bright-horses-double.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": patch
3+
---
4+
5+
Add correct cost and tokens info to Native OpenAI and DeepSeek providers

src/api/providers/deepseek.ts

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import OpenAI from "openai"
33
import { withRetry } from "../retry"
44
import { ApiHandler } from "../"
55
import { ApiHandlerOptions, DeepSeekModelId, ModelInfo, deepSeekDefaultModelId, deepSeekModels } from "../../shared/api"
6+
import { calculateApiCostOpenAI } from "../../utils/cost"
67
import { convertToOpenAiMessages } from "../transform/openai-format"
78
import { ApiStream } from "../transform/stream"
89
import { convertToR1Format } from "../transform/r1-format"
@@ -19,6 +20,37 @@ export class DeepSeekHandler implements ApiHandler {
1920
})
2021
}
2122

23+
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
24+
// Deepseek reports total input AND cache reads/writes,
25+
// see context caching: https://api-docs.deepseek.com/guides/kv_cache)
26+
// where the input tokens is the sum of the cache hits/misses, just like OpenAI.
27+
// This affects:
28+
// 1) context management truncation algorithm, and
29+
// 2) cost calculation
30+
31+
// Deepseek usage includes extra fields.
32+
// Safely cast the prompt token details section to the appropriate structure.
33+
interface DeepSeekUsage extends OpenAI.CompletionUsage {
34+
prompt_cache_hit_tokens?: number
35+
prompt_cache_miss_tokens?: number
36+
}
37+
const deepUsage = usage as DeepSeekUsage
38+
39+
const inputTokens = deepUsage?.prompt_tokens || 0
40+
const outputTokens = deepUsage?.completion_tokens || 0
41+
const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
42+
const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
43+
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
44+
yield {
45+
type: "usage",
46+
inputTokens: inputTokens,
47+
outputTokens: outputTokens,
48+
cacheWriteTokens: cacheWriteTokens,
49+
cacheReadTokens: cacheReadTokens,
50+
totalCost: totalCost,
51+
}
52+
}
53+
2254
@withRetry()
2355
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2456
const model = this.getModel()
@@ -61,15 +93,7 @@ export class DeepSeekHandler implements ApiHandler {
6193
}
6294

6395
if (chunk.usage) {
64-
yield {
65-
type: "usage",
66-
inputTokens: chunk.usage.prompt_tokens || 0, // (deepseek reports total input AND cache reads/writes, see context caching: https://api-docs.deepseek.com/guides/kv_cache) where the input tokens is the sum of the cache hits/misses, while anthropic reports them as separate tokens. This is important to know for 1) context management truncation algorithm, and 2) cost calculation (NOTE: we report both input and cache stats but for now set input price to 0 since all the cost calculation will be done using cache hits/misses)
67-
outputTokens: chunk.usage.completion_tokens || 0,
68-
// @ts-ignore-next-line
69-
cacheReadTokens: chunk.usage.prompt_cache_hit_tokens || 0,
70-
// @ts-ignore-next-line
71-
cacheWriteTokens: chunk.usage.prompt_cache_miss_tokens || 0,
72-
}
96+
yield* this.yieldUsage(model.info, chunk.usage)
7397
}
7498
}
7599
}

src/api/providers/openai-native.ts

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
openAiNativeModels,
1111
} from "../../shared/api"
1212
import { convertToOpenAiMessages } from "../transform/openai-format"
13+
import { calculateApiCostOpenAI } from "../../utils/cost"
1314
import { ApiStream } from "../transform/stream"
1415
import { ChatCompletionReasoningEffort } from "openai/resources/chat/completions.mjs"
1516

@@ -24,31 +25,47 @@ export class OpenAiNativeHandler implements ApiHandler {
2425
})
2526
}
2627

28+
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
29+
const inputTokens = usage?.prompt_tokens || 0
30+
const outputTokens = usage?.completion_tokens || 0
31+
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
32+
const cacheWriteTokens = 0
33+
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
34+
yield {
35+
type: "usage",
36+
inputTokens: inputTokens,
37+
outputTokens: outputTokens,
38+
cacheWriteTokens: cacheWriteTokens,
39+
cacheReadTokens: cacheReadTokens,
40+
totalCost: totalCost,
41+
}
42+
}
43+
2744
@withRetry()
2845
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
29-
switch (this.getModel().id) {
46+
const model = this.getModel()
47+
48+
switch (model.id) {
3049
case "o1":
3150
case "o1-preview":
3251
case "o1-mini": {
3352
// o1 doesnt support streaming, non-1 temp, or system prompt
3453
const response = await this.client.chat.completions.create({
35-
model: this.getModel().id,
54+
model: model.id,
3655
messages: [{ role: "user", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
3756
})
3857
yield {
3958
type: "text",
4059
text: response.choices[0]?.message.content || "",
4160
}
42-
yield {
43-
type: "usage",
44-
inputTokens: response.usage?.prompt_tokens || 0,
45-
outputTokens: response.usage?.completion_tokens || 0,
46-
}
61+
62+
yield* this.yieldUsage(model.info, response.usage)
63+
4764
break
4865
}
4966
case "o3-mini": {
5067
const stream = await this.client.chat.completions.create({
51-
model: this.getModel().id,
68+
model: model.id,
5269
messages: [{ role: "developer", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
5370
stream: true,
5471
stream_options: { include_usage: true },
@@ -63,18 +80,15 @@ export class OpenAiNativeHandler implements ApiHandler {
6380
}
6481
}
6582
if (chunk.usage) {
66-
yield {
67-
type: "usage",
68-
inputTokens: chunk.usage.prompt_tokens || 0,
69-
outputTokens: chunk.usage.completion_tokens || 0,
70-
}
83+
// Only last chunk contains usage
84+
yield* this.yieldUsage(model.info, chunk.usage)
7185
}
7286
}
7387
break
7488
}
7589
default: {
7690
const stream = await this.client.chat.completions.create({
77-
model: this.getModel().id,
91+
model: model.id,
7892
// max_completion_tokens: this.getModel().info.maxTokens,
7993
temperature: 0,
8094
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
@@ -90,14 +104,9 @@ export class OpenAiNativeHandler implements ApiHandler {
90104
text: delta.content,
91105
}
92106
}
93-
94-
// contains a null value except for the last chunk which contains the token usage statistics for the entire request
95107
if (chunk.usage) {
96-
yield {
97-
type: "usage",
98-
inputTokens: chunk.usage.prompt_tokens || 0,
99-
outputTokens: chunk.usage.completion_tokens || 0,
100-
}
108+
// Only last chunk contains usage
109+
yield* this.yieldUsage(model.info, chunk.usage)
101110
}
102111
}
103112
}

src/api/providers/requesty.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import OpenAI from "openai"
33
import { withRetry } from "../retry"
4+
import { calculateApiCostOpenAI } from "../../utils/cost"
45
import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
56
import { ApiHandler } from "../index"
67
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -69,13 +70,19 @@ export class RequestyHandler implements ApiHandler {
6970

7071
if (chunk.usage) {
7172
const usage = chunk.usage as RequestyUsage
73+
const inputTokens = usage.prompt_tokens || 0
74+
const outputTokens = usage.completion_tokens || 0
75+
const cacheWriteTokens = usage.prompt_tokens_details?.caching_tokens || undefined
76+
const cacheReadTokens = usage.prompt_tokens_details?.cached_tokens || undefined
77+
const totalCost = 0 // TODO: Replace with calculateApiCostOpenAI(model.info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
78+
7279
yield {
7380
type: "usage",
74-
inputTokens: usage.prompt_tokens || 0,
75-
outputTokens: usage.completion_tokens || 0,
76-
cacheWriteTokens: usage.prompt_tokens_details?.caching_tokens || undefined,
77-
cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || undefined,
78-
totalCost: usage.total_cost || undefined,
81+
inputTokens: inputTokens,
82+
outputTokens: outputTokens,
83+
cacheWriteTokens: cacheWriteTokens,
84+
cacheReadTokens: cacheReadTokens,
85+
totalCost: totalCost,
7986
}
8087
}
8188
}

src/api/providers/vscode-lm.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import * as vscode from "vscode"
33
import { ApiHandler, SingleCompletionHandler } from "../"
4-
import { calculateApiCost } from "../../utils/cost"
4+
import { calculateApiCostAnthropic } from "../../utils/cost"
55
import { ApiStream } from "../transform/stream"
66
import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format"
77
import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -525,7 +525,7 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
525525
type: "usage",
526526
inputTokens: totalInputTokens,
527527
outputTokens: totalOutputTokens,
528-
totalCost: calculateApiCost(this.getModel().info, totalInputTokens, totalOutputTokens),
528+
totalCost: calculateApiCostAnthropic(this.getModel().info, totalInputTokens, totalOutputTokens),
529529
}
530530
} catch (error: unknown) {
531531
this.ensureCleanState()

src/core/Cline.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import {
4747
import { getApiMetrics } from "../shared/getApiMetrics"
4848
import { HistoryItem } from "../shared/HistoryItem"
4949
import { ClineAskResponse, ClineCheckpointRestore } from "../shared/WebviewMessage"
50-
import { calculateApiCost } from "../utils/cost"
50+
import { calculateApiCostAnthropic } from "../utils/cost"
5151
import { fileExistsAtPath } from "../utils/fs"
5252
import { arePathsEqual, getReadablePath } from "../utils/path"
5353
import { fixModelHtmlEscaping, removeInvalidChars } from "../utils/string"
@@ -3115,7 +3115,13 @@ export class Cline {
31153115
cacheReads: cacheReadTokens,
31163116
cost:
31173117
totalCost ??
3118-
calculateApiCost(this.api.getModel().info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens),
3118+
calculateApiCostAnthropic(
3119+
this.api.getModel().info,
3120+
inputTokens,
3121+
outputTokens,
3122+
cacheWriteTokens,
3123+
cacheReadTokens,
3124+
),
31193125
cancelReason,
31203126
streamingFailedMessage,
31213127
} satisfies ClineApiReqInfo)

src/shared/api.ts

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,10 @@ export const openAiNativeModels = {
408408
maxTokens: 100_000,
409409
contextWindow: 200_000,
410410
supportsImages: false,
411-
supportsPromptCache: false,
411+
supportsPromptCache: true,
412412
inputPrice: 1.1,
413413
outputPrice: 4.4,
414+
cacheReadsPrice: 0.55,
414415
},
415416
// don't support tool use yet
416417
o1: {
@@ -420,38 +421,43 @@ export const openAiNativeModels = {
420421
supportsPromptCache: false,
421422
inputPrice: 15,
422423
outputPrice: 60,
424+
cacheReadsPrice: 7.5,
423425
},
424426
"o1-preview": {
425427
maxTokens: 32_768,
426428
contextWindow: 128_000,
427429
supportsImages: true,
428-
supportsPromptCache: false,
430+
supportsPromptCache: true,
429431
inputPrice: 15,
430432
outputPrice: 60,
433+
cacheReadsPrice: 7.5,
431434
},
432435
"o1-mini": {
433436
maxTokens: 65_536,
434437
contextWindow: 128_000,
435438
supportsImages: true,
436-
supportsPromptCache: false,
439+
supportsPromptCache: true,
437440
inputPrice: 1.1,
438441
outputPrice: 4.4,
442+
cacheReadsPrice: 0.55,
439443
},
440444
"gpt-4o": {
441445
maxTokens: 4_096,
442446
contextWindow: 128_000,
443447
supportsImages: true,
444-
supportsPromptCache: false,
448+
supportsPromptCache: true,
445449
inputPrice: 2.5,
446450
outputPrice: 10,
451+
cacheReadsPrice: 1.25,
447452
},
448453
"gpt-4o-mini": {
449454
maxTokens: 16_384,
450455
contextWindow: 128_000,
451456
supportsImages: true,
452-
supportsPromptCache: false,
457+
supportsPromptCache: true,
453458
inputPrice: 0.15,
454459
outputPrice: 0.6,
460+
cacheReadsPrice: 0.075,
455461
},
456462
"gpt-4.5-preview": {
457463
maxTokens: 16_384,
@@ -477,8 +483,8 @@ export const deepSeekModels = {
477483
maxTokens: 8_000,
478484
contextWindow: 64_000,
479485
supportsImages: false,
480-
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
481-
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
486+
supportsPromptCache: true,
487+
inputPrice: 0.27,
482488
outputPrice: 1.1,
483489
cacheWritesPrice: 0.27,
484490
cacheReadsPrice: 0.07,
@@ -487,8 +493,8 @@ export const deepSeekModels = {
487493
maxTokens: 8_000,
488494
contextWindow: 64_000,
489495
supportsImages: false,
490-
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
491-
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
496+
supportsPromptCache: true,
497+
inputPrice: 0.55,
492498
outputPrice: 2.19,
493499
cacheWritesPrice: 0.55,
494500
cacheReadsPrice: 0.14,

0 commit comments

Comments
 (0)