Skip to content

Commit e9b6659

Browse files
authored
Add cost calculation support for LiteLLM provider (RooCodeInc#2403)
* Add cost calculation support for LiteLLM provider * Calculate input/output cost up front once
1 parent 7a1e757 commit e9b6659

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

.changeset/purple-queens-teach.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": minor
3+
---
4+
5+
Add cost calculation support for LiteLLM provider

src/api/providers/litellm.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,40 @@ export class LiteLlmHandler implements ApiHandler {
1717
})
1818
}
1919

20+
async calculateCost(prompt_tokens: number, completion_tokens: number): Promise<number | undefined> {
21+
// Reference: https://github.com/BerriAI/litellm/blob/122ee634f434014267af104814022af1d9a0882f/litellm/proxy/spend_tracking/spend_management_endpoints.py#L1473
22+
const modelId = this.options.liteLlmModelId || liteLlmDefaultModelId
23+
try {
24+
const response = await fetch(`${this.client.baseURL}/spend/calculate`, {
25+
method: "POST",
26+
headers: {
27+
"Content-Type": "application/json",
28+
Authorization: `Bearer ${this.options.liteLlmApiKey}`,
29+
},
30+
body: JSON.stringify({
31+
completion_response: {
32+
model: modelId,
33+
usage: {
34+
prompt_tokens,
35+
completion_tokens,
36+
},
37+
},
38+
}),
39+
})
40+
41+
if (response.ok) {
42+
const data: { cost: number } = await response.json()
43+
return data.cost
44+
} else {
45+
console.error("Error calculating spend:", response.statusText)
46+
return undefined
47+
}
48+
} catch (error) {
49+
console.error("Error calculating spend:", error)
50+
return undefined
51+
}
52+
}
53+
2054
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2155
const formattedMessages = convertToOpenAiMessages(messages)
2256
const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
@@ -39,6 +73,9 @@ export class LiteLlmHandler implements ApiHandler {
3973
stream_options: { include_usage: true },
4074
})
4175

76+
const inputCost = (await this.calculateCost(1e6, 0)) || 0
77+
const outputCost = (await this.calculateCost(0, 1e6)) || 0
78+
4279
for await (const chunk of stream) {
4380
const delta = chunk.choices[0]?.delta
4481
if (delta?.content) {
@@ -49,10 +86,13 @@ export class LiteLlmHandler implements ApiHandler {
4986
}
5087

5188
if (chunk.usage) {
89+
const totalCost =
90+
(inputCost * chunk.usage.prompt_tokens) / 1e6 + (outputCost * chunk.usage.completion_tokens) / 1e6
5291
yield {
5392
type: "usage",
5493
inputTokens: chunk.usage.prompt_tokens || 0,
5594
outputTokens: chunk.usage.completion_tokens || 0,
95+
totalCost,
5696
}
5797
}
5898
}

0 commit comments

Comments
 (0)