Skip to content

Commit 0bbd3fd

Browse files
Add LiteLLM provider (#3242)
Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
1 parent cd423d3 commit 0bbd3fd

34 files changed

+366
-5
lines changed

src/api/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { FakeAIHandler } from "./providers/fake-ai"
2525
import { XAIHandler } from "./providers/xai"
2626
import { GroqHandler } from "./providers/groq"
2727
import { ChutesHandler } from "./providers/chutes"
28+
import { LiteLLMHandler } from "./providers/litellm"
2829

2930
export interface SingleCompletionHandler {
3031
completePrompt(prompt: string): Promise<string>
@@ -94,6 +95,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
9495
return new GroqHandler(options)
9596
case "chutes":
9697
return new ChutesHandler(options)
98+
case "litellm":
99+
return new LiteLLMHandler(options)
97100
default:
98101
return new AnthropicHandler(options)
99102
}

src/api/providers/fetchers/cache.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { getOpenRouterModels } from "./openrouter"
1212
import { getRequestyModels } from "./requesty"
1313
import { getGlamaModels } from "./glama"
1414
import { getUnboundModels } from "./unbound"
15+
import { getLiteLLMModels } from "./litellm"
1516

1617
const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
1718

@@ -36,9 +37,15 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
3637
* 2. File cache - This is a file-based cache that is used to store models for a longer period of time.
3738
*
3839
* @param router - The router to fetch models from.
40+
* @param apiKey - Optional API key for the provider.
41+
* @param baseUrl - Optional base URL for the provider (currently used only for LiteLLM).
3942
* @returns The models from the cache or the fetched models.
4043
*/
41-
export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
44+
export const getModels = async (
45+
router: RouterName,
46+
apiKey: string | undefined = undefined,
47+
baseUrl: string | undefined = undefined,
48+
): Promise<ModelRecord> => {
4249
let models = memoryCache.get<ModelRecord>(router)
4350
if (models) {
4451
// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
@@ -59,6 +66,13 @@ export const getModels = async (router: RouterName, apiKey: string | undefined =
5966
case "unbound":
6067
models = await getUnboundModels()
6168
break
69+
case "litellm":
70+
if (apiKey && baseUrl) {
71+
models = await getLiteLLMModels(apiKey, baseUrl)
72+
} else {
73+
models = {}
74+
}
75+
break
6276
}
6377

6478
if (Object.keys(models).length > 0) {
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import axios from "axios"
2+
import { COMPUTER_USE_MODELS, ModelRecord } from "../../../shared/api"
3+
4+
/**
5+
* Fetches available models from a LiteLLM server
6+
*
7+
* @param apiKey The API key for the LiteLLM server
8+
* @param baseUrl The base URL of the LiteLLM server
9+
* @returns A promise that resolves to a record of model IDs to model info
10+
*/
11+
export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise<ModelRecord> {
12+
try {
13+
const headers: Record<string, string> = {
14+
"Content-Type": "application/json",
15+
}
16+
17+
if (apiKey) {
18+
headers["Authorization"] = `Bearer ${apiKey}`
19+
}
20+
21+
const response = await axios.get(`${baseUrl}/v1/model/info`, { headers })
22+
const models: ModelRecord = {}
23+
24+
const computerModels = Array.from(COMPUTER_USE_MODELS)
25+
26+
// Process the model info from the response
27+
if (response.data && response.data.data && Array.isArray(response.data.data)) {
28+
for (const model of response.data.data) {
29+
const modelName = model.model_name
30+
const modelInfo = model.model_info
31+
const litellmModelName = model?.litellm_params?.model as string | undefined
32+
33+
if (!modelName || !modelInfo || !litellmModelName) continue
34+
35+
models[modelName] = {
36+
maxTokens: modelInfo.max_tokens || 8192,
37+
contextWindow: modelInfo.max_input_tokens || 200000,
38+
supportsImages: Boolean(modelInfo.supports_vision),
39+
// litellm_params.model may have a prefix like openrouter/
40+
supportsComputerUse: computerModels.some((computer_model) =>
41+
litellmModelName.endsWith(computer_model),
42+
),
43+
supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
44+
inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
45+
outputPrice: modelInfo.output_cost_per_token
46+
? modelInfo.output_cost_per_token * 1000000
47+
: undefined,
48+
description: `${modelName} via LiteLLM proxy`,
49+
}
50+
}
51+
}
52+
53+
return models
54+
} catch (error) {
55+
console.error("Error fetching LiteLLM models:", error)
56+
return {}
57+
}
58+
}

src/api/providers/litellm.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import OpenAI from "openai"
2+
import { Anthropic } from "@anthropic-ai/sdk" // Keep for type usage only
3+
4+
import { ApiHandlerOptions, litellmDefaultModelId, litellmDefaultModelInfo } from "../../shared/api"
5+
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
6+
import { convertToOpenAiMessages } from "../transform/openai-format"
7+
import { SingleCompletionHandler } from "../index"
8+
import { RouterProvider } from "./router-provider"
9+
10+
/**
11+
* LiteLLM provider handler
12+
*
13+
* This handler uses the LiteLLM API to proxy requests to various LLM providers.
14+
* It follows the OpenAI API format for compatibility.
15+
*/
16+
export class LiteLLMHandler extends RouterProvider implements SingleCompletionHandler {
17+
constructor(options: ApiHandlerOptions) {
18+
super({
19+
options,
20+
name: "litellm",
21+
baseURL: `${options.litellmBaseUrl || "http://localhost:4000"}`,
22+
apiKey: options.litellmApiKey || "dummy-key",
23+
modelId: options.litellmModelId,
24+
defaultModelId: litellmDefaultModelId,
25+
defaultModelInfo: litellmDefaultModelInfo,
26+
})
27+
}
28+
29+
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
30+
const { id: modelId, info } = await this.fetchModel()
31+
32+
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
33+
{ role: "system", content: systemPrompt },
34+
...convertToOpenAiMessages(messages),
35+
]
36+
37+
// Required by some providers; others default to max tokens allowed
38+
let maxTokens: number | undefined = info.maxTokens ?? undefined
39+
40+
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
41+
model: modelId,
42+
max_tokens: maxTokens,
43+
messages: openAiMessages,
44+
stream: true,
45+
stream_options: {
46+
include_usage: true,
47+
},
48+
}
49+
50+
if (this.supportsTemperature(modelId)) {
51+
requestOptions.temperature = this.options.modelTemperature ?? 0
52+
}
53+
54+
try {
55+
const { data: completion } = await this.client.chat.completions.create(requestOptions).withResponse()
56+
57+
let lastUsage
58+
59+
for await (const chunk of completion) {
60+
const delta = chunk.choices[0]?.delta
61+
const usage = chunk.usage as OpenAI.CompletionUsage
62+
63+
if (delta?.content) {
64+
yield { type: "text", text: delta.content }
65+
}
66+
67+
if (usage) {
68+
lastUsage = usage
69+
}
70+
}
71+
72+
if (lastUsage) {
73+
const usageData: ApiStreamUsageChunk = {
74+
type: "usage",
75+
inputTokens: lastUsage.prompt_tokens || 0,
76+
outputTokens: lastUsage.completion_tokens || 0,
77+
}
78+
79+
yield usageData
80+
}
81+
} catch (error) {
82+
if (error instanceof Error) {
83+
throw new Error(`LiteLLM streaming error: ${error.message}`)
84+
}
85+
throw error
86+
}
87+
}
88+
89+
async completePrompt(prompt: string): Promise<string> {
90+
const { id: modelId, info } = await this.fetchModel()
91+
92+
try {
93+
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
94+
model: modelId,
95+
messages: [{ role: "user", content: prompt }],
96+
}
97+
98+
if (this.supportsTemperature(modelId)) {
99+
requestOptions.temperature = this.options.modelTemperature ?? 0
100+
}
101+
102+
requestOptions.max_tokens = info.maxTokens
103+
104+
const response = await this.client.chat.completions.create(requestOptions)
105+
return response.choices[0]?.message.content || ""
106+
} catch (error) {
107+
if (error instanceof Error) {
108+
throw new Error(`LiteLLM completion error: ${error.message}`)
109+
}
110+
throw error
111+
}
112+
}
113+
}

src/api/providers/router-provider.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ export abstract class RouterProvider extends BaseProvider {
4444
}
4545

4646
public async fetchModel() {
47-
this.models = await getModels(this.name)
47+
this.models = await getModels(this.name, this.client.apiKey, this.client.baseURL)
4848
return this.getModel()
4949
}
5050

src/core/webview/webviewMessageHandler.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,11 +289,12 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
289289
case "requestRouterModels":
290290
const { apiConfiguration } = await provider.getState()
291291

292-
const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
292+
const [openRouterModels, requestyModels, glamaModels, unboundModels, litellmModels] = await Promise.all([
293293
getModels("openrouter", apiConfiguration.openRouterApiKey),
294294
getModels("requesty", apiConfiguration.requestyApiKey),
295295
getModels("glama", apiConfiguration.glamaApiKey),
296296
getModels("unbound", apiConfiguration.unboundApiKey),
297+
getModels("litellm", apiConfiguration.litellmApiKey, apiConfiguration.litellmBaseUrl),
297298
])
298299

299300
provider.postMessageToWebview({
@@ -303,6 +304,7 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
303304
requesty: requestyModels,
304305
glama: glamaModels,
305306
unbound: unboundModels,
307+
litellm: litellmModels,
306308
},
307309
})
308310
break

src/exports/roo-code.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ type ProviderSettings = {
2323
| "xai"
2424
| "groq"
2525
| "chutes"
26+
| "litellm"
2627
)
2728
| undefined
2829
apiModelId?: string | undefined
@@ -123,6 +124,9 @@ type ProviderSettings = {
123124
xaiApiKey?: string | undefined
124125
groqApiKey?: string | undefined
125126
chutesApiKey?: string | undefined
127+
litellmBaseUrl?: string | undefined
128+
litellmApiKey?: string | undefined
129+
litellmModelId?: string | undefined
126130
modelMaxTokens?: number | undefined
127131
modelMaxThinkingTokens?: number | undefined
128132
includeMaxTokens?: boolean | undefined
@@ -163,6 +167,7 @@ type GlobalSettings = {
163167
| "xai"
164168
| "groq"
165169
| "chutes"
170+
| "litellm"
166171
)
167172
| undefined
168173
}[]

src/exports/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type ProviderSettings = {
2424
| "xai"
2525
| "groq"
2626
| "chutes"
27+
| "litellm"
2728
)
2829
| undefined
2930
apiModelId?: string | undefined
@@ -124,6 +125,9 @@ type ProviderSettings = {
124125
xaiApiKey?: string | undefined
125126
groqApiKey?: string | undefined
126127
chutesApiKey?: string | undefined
128+
litellmBaseUrl?: string | undefined
129+
litellmApiKey?: string | undefined
130+
litellmModelId?: string | undefined
127131
modelMaxTokens?: number | undefined
128132
modelMaxThinkingTokens?: number | undefined
129133
includeMaxTokens?: boolean | undefined
@@ -166,6 +170,7 @@ type GlobalSettings = {
166170
| "xai"
167171
| "groq"
168172
| "chutes"
173+
| "litellm"
169174
)
170175
| undefined
171176
}[]

src/schemas/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export const providerNames = [
3131
"xai",
3232
"groq",
3333
"chutes",
34+
"litellm",
3435
] as const
3536

3637
export const providerNamesSchema = z.enum(providerNames)
@@ -429,6 +430,10 @@ export const providerSettingsSchema = z.object({
429430
groqApiKey: z.string().optional(),
430431
// Chutes AI
431432
chutesApiKey: z.string().optional(),
433+
// LiteLLM
434+
litellmBaseUrl: z.string().optional(),
435+
litellmApiKey: z.string().optional(),
436+
litellmModelId: z.string().optional(),
432437
// Claude 3.7 Sonnet Thinking
433438
modelMaxTokens: z.number().optional(),
434439
modelMaxThinkingTokens: z.number().optional(),
@@ -538,6 +543,10 @@ const providerSettingsRecord: ProviderSettingsRecord = {
538543
groqApiKey: undefined,
539544
// Chutes AI
540545
chutesApiKey: undefined,
546+
// LiteLLM
547+
litellmBaseUrl: undefined,
548+
litellmApiKey: undefined,
549+
litellmModelId: undefined,
541550
}
542551

543552
export const PROVIDER_SETTINGS_KEYS = Object.keys(providerSettingsRecord) as Keys<ProviderSettings>[]
@@ -732,6 +741,7 @@ export type SecretState = Pick<
732741
| "xaiApiKey"
733742
| "groqApiKey"
734743
| "chutesApiKey"
744+
| "litellmApiKey"
735745
>
736746

737747
type SecretStateRecord = Record<Keys<SecretState>, undefined>
@@ -753,6 +763,7 @@ const secretStateRecord: SecretStateRecord = {
753763
xaiApiKey: undefined,
754764
groqApiKey: undefined,
755765
chutesApiKey: undefined,
766+
litellmApiKey: undefined,
756767
}
757768

758769
export const SECRET_STATE_KEYS = Object.keys(secretStateRecord) as Keys<SecretState>[]

src/shared/api.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,20 @@ export const unboundDefaultModelInfo: ModelInfo = {
11361136
cacheReadsPrice: 0.3,
11371137
}
11381138

1139+
// LiteLLM
1140+
// https://docs.litellm.ai/
1141+
export const litellmDefaultModelId = "anthropic/claude-3-7-sonnet-20250219"
1142+
export const litellmDefaultModelInfo: ModelInfo = {
1143+
maxTokens: 8192,
1144+
contextWindow: 200_000,
1145+
supportsImages: true,
1146+
supportsComputerUse: true,
1147+
supportsPromptCache: true,
1148+
inputPrice: 3.0,
1149+
outputPrice: 15.0,
1150+
cacheWritesPrice: 3.75,
1151+
cacheReadsPrice: 0.3,
1152+
}
11391153
// xAI
11401154
// https://docs.x.ai/docs/api-reference
11411155
export type XAIModelId = keyof typeof xaiModels
@@ -1731,7 +1745,7 @@ export const COMPUTER_USE_MODELS = new Set([
17311745
"anthropic/claude-3.7-sonnet:thinking",
17321746
])
17331747

1734-
const routerNames = ["openrouter", "requesty", "glama", "unbound"] as const
1748+
const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
17351749

17361750
export type RouterName = (typeof routerNames)[number]
17371751

0 commit comments

Comments
 (0)