diff --git a/docs/docs/ai/llm.mdx b/docs/docs/ai/llm.mdx index b69a83650..b3073d8e0 100644 --- a/docs/docs/ai/llm.mdx +++ b/docs/docs/ai/llm.mdx @@ -26,6 +26,7 @@ We support the following types of LLM APIs: | [Voyage](#voyage) | `LlmApiType.VOYAGE` | ❌ | ✅ | | [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` | ✅ | ❌ | | [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` | ✅ | ❌ | +| [vLLM](#vllm) | `LlmApiType.VLLM` | ✅ | ❌ | ## LLM Tasks @@ -307,3 +308,34 @@ cocoindex.LlmSpec( You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models). + +### vLLM + +Install vLLM: + +```bash +pip install vllm +``` + +Run vLLM Server + +```bash +vllm serve deepseek-ai/deepseek-coder-1.3b-instruct +``` + + +A spec for vLLM looks like this: + + + + +```python +cocoindex.LlmSpec( + api_type=cocoindex.LlmApiType.VLLM, + model="deepseek-ai/deepseek-coder-1.3b-instruct", + address="http://127.0.0.1:8000/v1", +) +``` + + + \ No newline at end of file diff --git a/python/cocoindex/llm.py b/python/cocoindex/llm.py index 2fa61aa8e..6a77e93e8 100644 --- a/python/cocoindex/llm.py +++ b/python/cocoindex/llm.py @@ -12,6 +12,7 @@ class LlmApiType(Enum): LITE_LLM = "LiteLlm" OPEN_ROUTER = "OpenRouter" VOYAGE = "Voyage" + VLLM = "Vllm" @dataclass diff --git a/src/llm/mod.rs b/src/llm/mod.rs index a4f582899..9d1058c52 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -13,6 +13,7 @@ pub enum LlmApiType { LiteLlm, OpenRouter, Voyage, + Vllm, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -81,6 +82,7 @@ mod litellm; mod ollama; mod openai; mod openrouter; +mod vllm; mod voyage; pub async fn new_llm_generation_client( @@ -108,6 +110,9 @@ pub async fn new_llm_generation_client( LlmApiType::Voyage => { api_bail!("Voyage is not supported for generation") } + LlmApiType::Vllm => { + Box::new(vllm::Client::new_vllm(address).await?) as Box + } }; Ok(client) } @@ -129,6 +134,7 @@ pub fn new_llm_embedding_client( LlmApiType::Ollama | LlmApiType::OpenRouter | LlmApiType::LiteLlm + | LlmApiType::Vllm | LlmApiType::Anthropic => { api_bail!("Embedding is not supported for API type {:?}", api_type) } diff --git a/src/llm/vllm.rs b/src/llm/vllm.rs new file mode 100644 index 000000000..1f32bc655 --- /dev/null +++ b/src/llm/vllm.rs @@ -0,0 +1,16 @@ +use async_openai::Client as OpenAIClient; +use async_openai::config::OpenAIConfig; + +pub use super::openai::Client; + +impl Client { + pub async fn new_vllm(address: Option) -> anyhow::Result { + let address = address.unwrap_or_else(|| "http://127.0.0.1:8000/v1".to_string()); + let api_key = std::env::var("VLLM_API_KEY").ok(); + let mut config = OpenAIConfig::new().with_api_base(address); + if let Some(api_key) = api_key { + config = config.with_api_key(api_key); + } + Ok(Client::from_parts(OpenAIClient::with_config(config))) + } +}