Skip to content

Commit dca1076

Browse files
authored
Add cached read and writes to stats and cost calculation for LiteLLM provider (#4206)
* Add cached read and writes to cost calculation for LiteLLM * Fixed property issue
1 parent 3fc05ad commit dca1076

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

src/api/providers/fetchers/litellm.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
5858
outputPrice: modelInfo.output_cost_per_token
5959
? modelInfo.output_cost_per_token * 1000000
6060
: undefined,
61+
cacheWritesPrice: modelInfo.cache_creation_input_token_cost ? modelInfo.cache_creation_input_token_cost * 1000000 : undefined,
62+
cacheReadsPrice: modelInfo.cache_read_input_token_cost ? modelInfo.cache_read_input_token_cost * 1000000 : undefined,
6163
description: `${modelName} via LiteLLM proxy`,
6264
}
6365
}

src/api/providers/lite-llm.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only
33

44
import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
55

6+
import { calculateApiCostOpenAI } from "../../shared/cost"
7+
68
import { ApiHandlerOptions } from "../../shared/api"
79

810
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
@@ -66,7 +68,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
6668

6769
for await (const chunk of completion) {
6870
const delta = chunk.choices[0]?.delta
69-
const usage = chunk.usage as OpenAI.CompletionUsage
71+
const usage = chunk.usage as LiteLLMUsage
7072

7173
if (delta?.content) {
7274
yield { type: "text", text: delta.content }
@@ -82,8 +84,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
8284
type: "usage",
8385
inputTokens: lastUsage.prompt_tokens || 0,
8486
outputTokens: lastUsage.completion_tokens || 0,
87+
cacheWriteTokens: lastUsage.cache_creation_input_tokens || 0,
88+
cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0,
8589
}
8690

91+
usageData.totalCost = calculateApiCostOpenAI(info, usageData.inputTokens, usageData.outputTokens, usageData.cacheWriteTokens, usageData.cacheReadTokens)
92+
8793
yield usageData
8894
}
8995
} catch (error) {
@@ -119,3 +125,8 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
119125
}
120126
}
121127
}
128+
129+
// LiteLLM usage may include an extra field for Anthropic use cases.
130+
interface LiteLLMUsage extends OpenAI.CompletionUsage {
131+
cache_creation_input_tokens?: number
132+
}

0 commit comments

Comments
 (0)