RooCodeInc
diff --git a/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/cache.ts‎
Lines changed: 15 additions & 1 deletion b/‎src/api/providers/fetchers/cache.ts‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎src/api/providers/fetchers/litellm.ts‎
Lines changed: 58 additions & 0 deletions b/‎src/api/providers/fetchers/litellm.ts‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎src/api/providers/litellm.ts‎
Lines changed: 113 additions & 0 deletions b/‎src/api/providers/litellm.ts‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎src/api/providers/router-provider.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/api/providers/router-provider.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 3 additions & 1 deletion b/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/exports/roo-code.d.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/exports/roo-code.d.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/exports/types.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/exports/types.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/schemas/index.ts‎
Lines changed: 11 additions & 0 deletions b/‎src/schemas/index.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/shared/api.ts‎
Lines changed: 15 additions & 1 deletion b/‎src/shared/api.ts‎
Lines changed: 15 additions & 1 deletion
@@ -25,6 +25,7 @@ import { FakeAIHandler } from "./providers/fake-ai"
 import { XAIHandler } from "./providers/xai"
 import { GroqHandler } from "./providers/groq"
 import { ChutesHandler } from "./providers/chutes"
+import { LiteLLMHandler } from "./providers/litellm"
 
 export interface SingleCompletionHandler {
 	completePrompt(prompt: string): Promise<string>
@@ -94,6 +95,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 			return new GroqHandler(options)
 		case "chutes":
 			return new ChutesHandler(options)
+		case "litellm":
+			return new LiteLLMHandler(options)
 		default:
 			return new AnthropicHandler(options)
 	}
 
@@ -12,6 +12,7 @@ import { getOpenRouterModels } from "./openrouter"
 import { getRequestyModels } from "./requesty"
 import { getGlamaModels } from "./glama"
 import { getUnboundModels } from "./unbound"
+import { getLiteLLMModels } from "./litellm"
 
 const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
 
@@ -36,9 +37,15 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
  * 2. File cache - This is a file-based cache that is used to store models for a longer period of time.
  *
  * @param router - The router to fetch models from.
+ * @param apiKey - Optional API key for the provider.
+ * @param baseUrl - Optional base URL for the provider (currently used only for LiteLLM).
  * @returns The models from the cache or the fetched models.
  */
-export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
+export const getModels = async (
+	router: RouterName,
+	apiKey: string | undefined = undefined,
+	baseUrl: string | undefined = undefined,
+): Promise<ModelRecord> => {
 	let models = memoryCache.get<ModelRecord>(router)
 	if (models) {
 		// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
@@ -59,6 +66,13 @@ export const getModels = async (router: RouterName, apiKey: string | undefined =
 		case "unbound":
 			models = await getUnboundModels()
 			break
+		case "litellm":
+			if (apiKey && baseUrl) {
+				models = await getLiteLLMModels(apiKey, baseUrl)
+			} else {
+				models = {}
+			}
+			break
 	}
 
 	if (Object.keys(models).length > 0) {
 
@@ -0,0 +1,58 @@
+import axios from "axios"
+import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
+
+/**
+ * Fetches available models from a LiteLLM server
+ *
+ * @param apiKey The API key for the LiteLLM server
+ * @param baseUrl The base URL of the LiteLLM server
+ * @returns A promise that resolves to a record of model IDs to model info
+ */
+export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise<ModelRecord> {
+	try {
+		const headers: Record<string, string> = {
+			"Content-Type": "application/json",
+		}
+
+		if (apiKey) {
+			headers["Authorization"] = `Bearer ${apiKey}`
+		}
+
+		const response = await axios.get(`${baseUrl}/v1/model/info`, { headers })
+		const models: ModelRecord = {}
+
+		const computerModels = Array.from(COMPUTER_USE_MODELS)
+
+		// Process the model info from the response
+		if (response.data && response.data.data && Array.isArray(response.data.data)) {
+			for (const model of response.data.data) {
+				const modelName = model.model_name
+				const modelInfo = model.model_info
+				const litellmModelName = model?.litellm_params?.model as string | undefined
+
+				if (!modelName || !modelInfo || !litellmModelName) continue
+
+				models[modelName] = {
+					maxTokens: modelInfo.max_tokens || 8192,
+					contextWindow: modelInfo.max_input_tokens || 200000,
+					supportsImages: Boolean(modelInfo.supports_vision),
+					// litellm_params.model may have a prefix like openrouter/
+					supportsComputerUse: computerModels.some((computer_model) =>
+						litellmModelName.endsWith(computer_model),
+					),
+					supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
+					inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
+					outputPrice: modelInfo.output_cost_per_token
+						? modelInfo.output_cost_per_token * 1000000
+						: undefined,
+					description: `${modelName} via LiteLLM proxy`,
+				}
+			}
+		}
+
+		return models
+	} catch (error) {
+		console.error("Error fetching LiteLLM models:", error)
+		return {}
+	}
+}
@@ -0,0 +1,113 @@
+import OpenAI from "openai"
+import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only
+
+import { ApiHandlerOptions, litellmDefaultModelId, litellmDefaultModelInfo } from "../../shared/api"
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { SingleCompletionHandler } from "../index"
+import { RouterProvider } from "./router-provider"
+
+/**
+ * LiteLLM provider handler
+ *
+ * This handler uses the LiteLLM API to proxy requests to various LLM providers.
+ * It follows the OpenAI API format for compatibility.
+ */
+export class LiteLLMHandler extends RouterProvider implements SingleCompletionHandler {
+	constructor(options: ApiHandlerOptions) {
+		super({
+			options,
+			name: "litellm",
+			baseURL: `${options.litellmBaseUrl || "http://localhost:4000"}`,
+			apiKey: options.litellmApiKey || "dummy-key",
+			modelId: options.litellmModelId,
+			defaultModelId: litellmDefaultModelId,
+			defaultModelInfo: litellmDefaultModelInfo,
+		})
+	}
+
+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		const { id: modelId, info } = await this.fetchModel()
+
+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		// Required by some providers; others default to max tokens allowed
+		let maxTokens: number | undefined = info.maxTokens ?? undefined
+
+		const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model: modelId,
+			max_tokens: maxTokens,
+			messages: openAiMessages,
+			stream: true,
+			stream_options: {
+				include_usage: true,
+			},
+		}
+
+		if (this.supportsTemperature(modelId)) {
+			requestOptions.temperature = this.options.modelTemperature ?? 0
+		}
+
+		try {
+			const { data: completion } = await this.client.chat.completions.create(requestOptions).withResponse()
+
+			let lastUsage
+
+			for await (const chunk of completion) {
+				const delta = chunk.choices[0]?.delta
+				const usage = chunk.usage as OpenAI.CompletionUsage
+
+				if (delta?.content) {
+					yield { type: "text", text: delta.content }
+				}
+
+				if (usage) {
+					lastUsage = usage
+				}
+			}
+
+			if (lastUsage) {
+				const usageData: ApiStreamUsageChunk = {
+					type: "usage",
+					inputTokens: lastUsage.prompt_tokens || 0,
+					outputTokens: lastUsage.completion_tokens || 0,
+				}
+
+				yield usageData
+			}
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`LiteLLM streaming error: ${error.message}`)
+			}
+			throw error
+		}
+	}
+
+	async completePrompt(prompt: string): Promise<string> {
+		const { id: modelId, info } = await this.fetchModel()
+
+		try {
+			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
+				model: modelId,
+				messages: [{ role: "user", content: prompt }],
+			}
+
+			if (this.supportsTemperature(modelId)) {
+				requestOptions.temperature = this.options.modelTemperature ?? 0
+			}
+
+			requestOptions.max_tokens = info.maxTokens
+
+			const response = await this.client.chat.completions.create(requestOptions)
+			return response.choices[0]?.message.content || ""
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`LiteLLM completion error: ${error.message}`)
+			}
+			throw error
+		}
+	}
+}
@@ -44,7 +44,7 @@ export abstract class RouterProvider extends BaseProvider {
 	}
 
 	public async fetchModel() {
-		this.models = await getModels(this.name)
+		this.models = await getModels(this.name, this.client.apiKey, this.client.baseURL)
 		return this.getModel()
 	}
 
 
@@ -289,11 +289,12 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 		case "requestRouterModels":
 			const { apiConfiguration } = await provider.getState()
 
-			const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
+			const [openRouterModels, requestyModels, glamaModels, unboundModels, litellmModels] = await Promise.all([
 				getModels("openrouter", apiConfiguration.openRouterApiKey),
 				getModels("requesty", apiConfiguration.requestyApiKey),
 				getModels("glama", apiConfiguration.glamaApiKey),
 				getModels("unbound", apiConfiguration.unboundApiKey),
+				getModels("litellm", apiConfiguration.litellmApiKey, apiConfiguration.litellmBaseUrl),
 			])
 
 			provider.postMessageToWebview({
@@ -303,6 +304,7 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 					requesty: requestyModels,
 					glama: glamaModels,
 					unbound: unboundModels,
+					litellm: litellmModels,
 				},
 			})
 			break
 
@@ -23,6 +23,7 @@ type ProviderSettings = {
 				| "xai"
 				| "groq"
 				| "chutes"
+				| "litellm"
 		  )
 		| undefined
 	apiModelId?: string | undefined
@@ -123,6 +124,9 @@ type ProviderSettings = {
 	xaiApiKey?: string | undefined
 	groqApiKey?: string | undefined
 	chutesApiKey?: string | undefined
+	litellmBaseUrl?: string | undefined
+	litellmApiKey?: string | undefined
+	litellmModelId?: string | undefined
 	modelMaxTokens?: number | undefined
 	modelMaxThinkingTokens?: number | undefined
 	includeMaxTokens?: boolean | undefined
@@ -163,6 +167,7 @@ type GlobalSettings = {
 							| "xai"
 							| "groq"
 							| "chutes"
+							| "litellm"
 					  )
 					| undefined
 		  }[]
 
@@ -24,6 +24,7 @@ type ProviderSettings = {
 				| "xai"
 				| "groq"
 				| "chutes"
+				| "litellm"
 		  )
 		| undefined
 	apiModelId?: string | undefined
@@ -124,6 +125,9 @@ type ProviderSettings = {
 	xaiApiKey?: string | undefined
 	groqApiKey?: string | undefined
 	chutesApiKey?: string | undefined
+	litellmBaseUrl?: string | undefined
+	litellmApiKey?: string | undefined
+	litellmModelId?: string | undefined
 	modelMaxTokens?: number | undefined
 	modelMaxThinkingTokens?: number | undefined
 	includeMaxTokens?: boolean | undefined
@@ -166,6 +170,7 @@ type GlobalSettings = {
 							| "xai"
 							| "groq"
 							| "chutes"
+							| "litellm"
 					  )
 					| undefined
 		  }[]
 
@@ -31,6 +31,7 @@ export const providerNames = [
 	"xai",
 	"groq",
 	"chutes",
+	"litellm",
 ] as const
 
 export const providerNamesSchema = z.enum(providerNames)
@@ -429,6 +430,10 @@ export const providerSettingsSchema = z.object({
 	groqApiKey: z.string().optional(),
 	// Chutes AI
 	chutesApiKey: z.string().optional(),
+	// LiteLLM
+	litellmBaseUrl: z.string().optional(),
+	litellmApiKey: z.string().optional(),
+	litellmModelId: z.string().optional(),
 	// Claude 3.7 Sonnet Thinking
 	modelMaxTokens: z.number().optional(),
 	modelMaxThinkingTokens: z.number().optional(),
@@ -538,6 +543,10 @@ const providerSettingsRecord: ProviderSettingsRecord = {
 	groqApiKey: undefined,
 	// Chutes AI
 	chutesApiKey: undefined,
+	// LiteLLM
+	litellmBaseUrl: undefined,
+	litellmApiKey: undefined,
+	litellmModelId: undefined,
 }
 
 export const PROVIDER_SETTINGS_KEYS = Object.keys(providerSettingsRecord) as Keys<ProviderSettings>[]
@@ -732,6 +741,7 @@ export type SecretState = Pick<
 	| "xaiApiKey"
 	| "groqApiKey"
 	| "chutesApiKey"
+	| "litellmApiKey"
 >
 
 type SecretStateRecord = Record<Keys<SecretState>, undefined>
@@ -753,6 +763,7 @@ const secretStateRecord: SecretStateRecord = {
 	xaiApiKey: undefined,
 	groqApiKey: undefined,
 	chutesApiKey: undefined,
+	litellmApiKey: undefined,
 }
 
 export const SECRET_STATE_KEYS = Object.keys(secretStateRecord) as Keys<SecretState>[]
 
@@ -1136,6 +1136,20 @@ export const unboundDefaultModelInfo: ModelInfo = {
 	cacheReadsPrice: 0.3,
 }
 
+// LiteLLM
+// https://docs.litellm.ai/
+export const litellmDefaultModelId = "anthropic/claude-3-7-sonnet-20250219"
+export const litellmDefaultModelInfo: ModelInfo = {
+	maxTokens: 8192,
+	contextWindow: 200_000,
+	supportsImages: true,
+	supportsComputerUse: true,
+	supportsPromptCache: true,
+	inputPrice: 3.0,
+	outputPrice: 15.0,
+	cacheWritesPrice: 3.75,
+	cacheReadsPrice: 0.3,
+}
 // xAI
 // https://docs.x.ai/docs/api-reference
 export type XAIModelId = keyof typeof xaiModels
@@ -1731,7 +1745,7 @@ export const COMPUTER_USE_MODELS = new Set([
 	"anthropic/claude-3.7-sonnet:thinking",
 ])
 
-const routerNames = ["openrouter", "requesty", "glama", "unbound"] as const
+const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
 
 export type RouterName = (typeof routerNames)[number]
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ export abstract class RouterProvider extends BaseProvider {`
`44`	`44`	`}`
`45`	`45`
`46`	`46`	`public async fetchModel() {`
`47`		`- this.models = await getModels(this.name)`
	`47`	`+ this.models = await getModels(this.name, this.client.apiKey, this.client.baseURL)`
`48`	`48`	`return this.getModel()`
`49`	`49`	`}`
`50`	`50`