feat(llm): Add LiteLLM (Proxy) Support (#630)

par4m · web-flow · commit 95c84794ef5e · 2025-06-18T11:31:56.000-07:00
* Add LiteLLM

* Use existing impl

* Use OpenAI Config

* Add Docs for LiteLLM
diff --git a/docs/docs/ai/llm.mdx b/docs/docs/ai/llm.mdx
@@ -121,3 +121,68 @@ cocoindex.LlmSpec(
 
 You can find the full list of models supported by Anthropic [here](https://docs.anthropic.com/en/docs/about-claude/models/all-models).
 
+### LiteLLM
+
+To use the LiteLLM API, you need to set the environment variable `LITELLM_API_KEY`.
+
+#### 1. Install LiteLLM Proxy
+
+```bash
+pip install 'litellm[proxy]'
+```
+
+#### 2. Create a `config.yml` for LiteLLM
+
+**Example for OpenAI:**
+```yaml
+model_list:
+  - model_name: "*"             
+    litellm_params:
+      model: openai/*           
+      api_key: os.environ/LITELLM_API_KEY
+```
+
+**Example for DeepSeek:**
+
+First, pull the DeepSeek model with Ollama:
+```bash
+ollama pull deepseek-r1
+```
+Then run it if it's not running:
+```bash
+ollama run deepseek-r1
+```
+
+Then, use this in your `config.yml`:
+```yaml
+model_list:
+  - model_name: "deepseek-r1"
+    litellm_params:
+      model: "ollama_chat/deepseek-r1"
+      api_base: "http://localhost:11434"
+```
+
+#### 3. Run LiteLLM Proxy
+
+```bash
+litellm --config config.yml
+```
+
+#### 4. A Spec for LiteLLM will look like this:
+
+<Tabs>
+<TabItem value="python" label="Python" default>
+
+```python
+cocoindex.LlmSpec(
+    api_type=cocoindex.LlmApiType.LITELLM,
+    model="deepseek-r1", 
+    address="http://127.0.0.1:4000", # default url of LiteLLM
+)
+```
+
+</TabItem>
+</Tabs>
+
+You can find the full list of models supported by LiteLLM [here](https://docs.litellm.ai/docs/providers).
+
diff --git a/python/cocoindex/llm.py b/python/cocoindex/llm.py
@@ -9,6 +9,7 @@ class LlmApiType(Enum):
     OLLAMA = "Ollama"
     GEMINI = "Gemini"
     ANTHROPIC = "Anthropic"
+    LITELLM = "LiteLlm"
 
 
 @dataclass
diff --git a/src/llm/litellm.rs b/src/llm/litellm.rs
@@ -0,0 +1,16 @@
+use async_openai::config::OpenAIConfig;
+use async_openai::Client as OpenAIClient;
+
+pub use super::openai::Client;
+
+impl Client {
+    pub async fn new_litellm(spec: super::LlmSpec) -> anyhow::Result<Self> {
+        let address = spec.address.clone().unwrap_or_else(|| "http://127.0.0.1:4000".to_string());
+        let api_key = std::env::var("LITELLM_API_KEY").ok();
+        let mut config = OpenAIConfig::new().with_api_base(address);
+        if let Some(api_key) = api_key {
+            config = config.with_api_key(api_key);
+        }
+        Ok(Client::from_parts(OpenAIClient::with_config(config), spec.model))
+    }
+}
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
@@ -13,6 +13,7 @@ pub enum LlmApiType {
     OpenAi,
     Gemini,
     Anthropic,
+    LiteLlm,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -56,6 +57,7 @@ mod anthropic;
 mod gemini;
 mod ollama;
 mod openai;
+mod litellm;
 
 pub async fn new_llm_generation_client(spec: LlmSpec) -> Result<Box<dyn LlmGenerationClient>> {
     let client = match spec.api_type {
@@ -71,6 +73,9 @@ pub async fn new_llm_generation_client(spec: LlmSpec) -> Result<Box<dyn LlmGener
         LlmApiType::Anthropic => {
             Box::new(anthropic::Client::new(spec).await?) as Box<dyn LlmGenerationClient>
         }
+        LlmApiType::LiteLlm => {
+            Box::new(litellm::Client::new_litellm(spec).await?) as Box<dyn LlmGenerationClient>
+        }
     };
     Ok(client)
 }
diff --git a/src/llm/openai.rs b/src/llm/openai.rs
@@ -20,6 +20,10 @@ pub struct Client {
 }
 
 impl Client {
+    pub(crate) fn from_parts(client: async_openai::Client<OpenAIConfig>, model: String) -> Self {
+        Self { client, model }
+    }
+
     pub async fn new(spec: super::LlmSpec) -> Result<Self> {
         if let Some(address) = spec.address {
             api_bail!("OpenAI doesn't support custom API address: {address}");