Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/api/providers/fetchers/litellm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
outputPrice: modelInfo.output_cost_per_token
? modelInfo.output_cost_per_token * 1000000
: undefined,
cacheWritesPrice: modelInfo.cache_creation_input_token_cost ? modelInfo.cache_creation_input_token_cost * 1000000 : undefined,
cacheReadsPrice: modelInfo.cache_read_input_token_cost ? modelInfo.cache_read_input_token_cost * 1000000 : undefined,
description: `${modelName} via LiteLLM proxy`,
}
}
Expand Down
13 changes: 12 additions & 1 deletion src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only

import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"

import { calculateApiCostOpenAI } from "../../shared/cost"

import { ApiHandlerOptions } from "../../shared/api"

import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
Expand Down Expand Up @@ -66,7 +68,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa

for await (const chunk of completion) {
const delta = chunk.choices[0]?.delta
const usage = chunk.usage as OpenAI.CompletionUsage
const usage = chunk.usage as LiteLLMUsage

if (delta?.content) {
yield { type: "text", text: delta.content }
Expand All @@ -82,8 +84,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
type: "usage",
inputTokens: lastUsage.prompt_tokens || 0,
outputTokens: lastUsage.completion_tokens || 0,
cacheWriteTokens: lastUsage.cache_creation_input_tokens || 0,
cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0,
}

usageData.totalCost = calculateApiCostOpenAI(info, usageData.inputTokens, usageData.outputTokens, usageData.cacheWriteTokens, usageData.cacheReadTokens)

yield usageData
}
} catch (error) {
Expand Down Expand Up @@ -119,3 +125,8 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
}
}
}

// LiteLLM usage may include an extra field for Anthropic use cases.
interface LiteLLMUsage extends OpenAI.CompletionUsage {
cache_creation_input_tokens?: number
}
Loading