diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts index 34f6a111d2..ec65018d17 100644 --- a/src/api/providers/fetchers/litellm.ts +++ b/src/api/providers/fetchers/litellm.ts @@ -58,6 +58,8 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise outputPrice: modelInfo.output_cost_per_token ? modelInfo.output_cost_per_token * 1000000 : undefined, + cacheWritesPrice: modelInfo.cache_creation_input_token_cost ? modelInfo.cache_creation_input_token_cost * 1000000 : undefined, + cacheReadsPrice: modelInfo.cache_read_input_token_cost ? modelInfo.cache_read_input_token_cost * 1000000 : undefined, description: `${modelName} via LiteLLM proxy`, } } diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 002040df2a..b82bda9275 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -3,6 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types" +import { calculateApiCostOpenAI } from "../../shared/cost" + import { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" @@ -66,7 +68,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa for await (const chunk of completion) { const delta = chunk.choices[0]?.delta - const usage = chunk.usage as OpenAI.CompletionUsage + const usage = chunk.usage as LiteLLMUsage if (delta?.content) { yield { type: "text", text: delta.content } @@ -82,8 +84,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa type: "usage", inputTokens: lastUsage.prompt_tokens || 0, outputTokens: lastUsage.completion_tokens || 0, + cacheWriteTokens: lastUsage.cache_creation_input_tokens || 0, + cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0, } + usageData.totalCost = calculateApiCostOpenAI(info, usageData.inputTokens, usageData.outputTokens, usageData.cacheWriteTokens, usageData.cacheReadTokens) + yield usageData } } catch (error) { @@ -119,3 +125,8 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa } } } + +// LiteLLM usage may include an extra field for Anthropic use cases. +interface LiteLLMUsage extends OpenAI.CompletionUsage { + cache_creation_input_tokens?: number +} \ No newline at end of file