diff --git a/docs/docs/ai/llm.mdx b/docs/docs/ai/llm.mdx
index b69a83650..b3073d8e0 100644
--- a/docs/docs/ai/llm.mdx
+++ b/docs/docs/ai/llm.mdx
@@ -26,6 +26,7 @@ We support the following types of LLM APIs:
| [Voyage](#voyage) | `LlmApiType.VOYAGE` | ❌ | ✅ |
| [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` | ✅ | ❌ |
| [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` | ✅ | ❌ |
+| [vLLM](#vllm) | `LlmApiType.VLLM` | ✅ | ❌ |
## LLM Tasks
@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
+
+### vLLM
+
+Install vLLM:
+
+```bash
+pip install vllm
+```
+
+Run vLLM Server
+
+```bash
+vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
+```
+
+
+A spec for vLLM looks like this:
+
+
+
+
+```python
+cocoindex.LlmSpec(
+ api_type=cocoindex.LlmApiType.VLLM,
+ model="deepseek-ai/deepseek-coder-1.3b-instruct",
+ address="http://127.0.0.1:8000/v1",
+)
+```
+
+
+
\ No newline at end of file
diff --git a/python/cocoindex/llm.py b/python/cocoindex/llm.py
index 2fa61aa8e..6a77e93e8 100644
--- a/python/cocoindex/llm.py
+++ b/python/cocoindex/llm.py
@@ -12,6 +12,7 @@ class LlmApiType(Enum):
LITE_LLM = "LiteLlm"
OPEN_ROUTER = "OpenRouter"
VOYAGE = "Voyage"
+ VLLM = "Vllm"
@dataclass
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
index a4f582899..9d1058c52 100644
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@@ -13,6 +13,7 @@ pub enum LlmApiType {
LiteLlm,
OpenRouter,
Voyage,
+ Vllm,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -81,6 +82,7 @@ mod litellm;
mod ollama;
mod openai;
mod openrouter;
+mod vllm;
mod voyage;
pub async fn new_llm_generation_client(
@@ -108,6 +110,9 @@ pub async fn new_llm_generation_client(
LlmApiType::Voyage => {
api_bail!("Voyage is not supported for generation")
}
+ LlmApiType::Vllm => {
+ Box::new(vllm::Client::new_vllm(address).await?) as Box
+ }
};
Ok(client)
}
@@ -129,6 +134,7 @@ pub fn new_llm_embedding_client(
LlmApiType::Ollama
| LlmApiType::OpenRouter
| LlmApiType::LiteLlm
+ | LlmApiType::Vllm
| LlmApiType::Anthropic => {
api_bail!("Embedding is not supported for API type {:?}", api_type)
}
diff --git a/src/llm/vllm.rs b/src/llm/vllm.rs
new file mode 100644
index 000000000..1f32bc655
--- /dev/null
+++ b/src/llm/vllm.rs
@@ -0,0 +1,16 @@
+use async_openai::Client as OpenAIClient;
+use async_openai::config::OpenAIConfig;
+
+pub use super::openai::Client;
+
+impl Client {
+ pub async fn new_vllm(address: Option) -> anyhow::Result {
+ let address = address.unwrap_or_else(|| "http://127.0.0.1:8000/v1".to_string());
+ let api_key = std::env::var("VLLM_API_KEY").ok();
+ let mut config = OpenAIConfig::new().with_api_base(address);
+ if let Some(api_key) = api_key {
+ config = config.with_api_key(api_key);
+ }
+ Ok(Client::from_parts(OpenAIClient::with_config(config)))
+ }
+}