Add cached read and writes to cost calculation for LiteLLM

mollux · mollux · commit 37160532c1af · 2025-06-01T21:46:13.000+02:00
diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts
@@ -58,6 +58,8 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 					outputPrice: modelInfo.output_cost_per_token
 						? modelInfo.output_cost_per_token * 1000000
 						: undefined,
+					cacheWritesPrice: modelInfo.cache_creation_input_token_cost ? modelInfo.cache_creation_input_token_cost * 1000000 : undefined,
+					cacheReadsPrice: modelInfo.cache_read_input_token_cost ? modelInfo.cache_read_input_token_cost * 1000000 : undefined,
 					description: `${modelName} via LiteLLM proxy`,
 				}
 			}
diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts
@@ -3,6 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only
 
 import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
 
+import { calculateApiCostOpenAI } from "../../shared/cost"
+
 import { ApiHandlerOptions } from "../../shared/api"
 
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
@@ -82,8 +84,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 					type: "usage",
 					inputTokens: lastUsage.prompt_tokens || 0,
 					outputTokens: lastUsage.completion_tokens || 0,
+					cacheWriteTokens: lastUsage.cache_creation_input_tokens || 0,
+					cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0,
 				}
 
+				usageData.totalCost = calculateApiCostOpenAI(info, usageData.inputTokens, usageData.outputTokens, usageData.cacheWriteTokens, usageData.cacheReadTokens)
+
 				yield usageData
 			}
 		} catch (error) {

Original file line number	Diff line number	Diff line change
`@@ -58,6 +58,8 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise`
`58`	`58`	`outputPrice: modelInfo.output_cost_per_token`
`59`	`59`	`? modelInfo.output_cost_per_token * 1000000`
`60`	`60`	`: undefined,`
	`61`	`+ cacheWritesPrice: modelInfo.cache_creation_input_token_cost ? modelInfo.cache_creation_input_token_cost * 1000000 : undefined,`
	`62`	`+ cacheReadsPrice: modelInfo.cache_read_input_token_cost ? modelInfo.cache_read_input_token_cost * 1000000 : undefined,`
`61`	`63`	description: `${modelName} via LiteLLM proxy`,
`62`	`64`	`}`
`63`	`65`	`}`