File tree Expand file tree Collapse file tree 4 files changed +55
-0
lines changed Expand file tree Collapse file tree 4 files changed +55
-0
lines changed Original file line number Diff line number Diff line change @@ -26,6 +26,7 @@ We support the following types of LLM APIs:
2626| [ Voyage] ( #voyage ) | ` LlmApiType.VOYAGE ` | ❌ | ✅ |
2727| [ LiteLLM] ( #litellm ) | ` LlmApiType.LITE_LLM ` | ✅ | ❌ |
2828| [ OpenRouter] ( #openrouter ) | ` LlmApiType.OPEN_ROUTER ` | ✅ | ❌ |
29+ | [ vLLM] ( #vllm ) | ` LlmApiType.VLLM ` | ✅ | ❌ |
2930
3031## LLM Tasks
3132
@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
307308</Tabs>
308309
309310You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
311+
312+ # ## vLLM
313+
314+ Install vLLM :
315+
316+ ` ` ` bash
317+ pip install vllm
318+ ` ` `
319+
320+ Run vLLM Server
321+
322+ ` ` ` bash
323+ vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
324+ ` ` `
325+
326+
327+ A spec for vLLM looks like this :
328+
329+ <Tabs>
330+ <TabItem value="python" label="Python" default>
331+
332+ ` ` ` python
333+ cocoindex.LlmSpec(
334+ api_type=cocoindex.LlmApiType.VLLM,
335+ model="deepseek-ai/deepseek-coder-1.3b-instruct",
336+ address="http://127.0.0.1:8000/v1",
337+ )
338+ ` ` `
339+
340+ </TabItem>
341+ </Tabs>
Original file line number Diff line number Diff line change @@ -12,6 +12,7 @@ class LlmApiType(Enum):
1212 LITE_LLM = "LiteLlm"
1313 OPEN_ROUTER = "OpenRouter"
1414 VOYAGE = "Voyage"
15+ VLLM = "Vllm"
1516
1617
1718@dataclass
Original file line number Diff line number Diff line change @@ -13,6 +13,7 @@ pub enum LlmApiType {
1313 LiteLlm ,
1414 OpenRouter ,
1515 Voyage ,
16+ Vllm ,
1617}
1718
1819#[ derive( Debug , Clone , Serialize , Deserialize ) ]
@@ -81,6 +82,7 @@ mod litellm;
8182mod ollama;
8283mod openai;
8384mod openrouter;
85+ mod vllm;
8486mod voyage;
8587
8688pub async fn new_llm_generation_client (
@@ -108,6 +110,9 @@ pub async fn new_llm_generation_client(
108110 LlmApiType :: Voyage => {
109111 api_bail ! ( "Voyage is not supported for generation" )
110112 }
113+ LlmApiType :: Vllm => {
114+ Box :: new ( vllm:: Client :: new_vllm ( address) . await ?) as Box < dyn LlmGenerationClient >
115+ }
111116 } ;
112117 Ok ( client)
113118}
@@ -129,6 +134,7 @@ pub fn new_llm_embedding_client(
129134 LlmApiType :: Ollama
130135 | LlmApiType :: OpenRouter
131136 | LlmApiType :: LiteLlm
137+ | LlmApiType :: Vllm
132138 | LlmApiType :: Anthropic => {
133139 api_bail ! ( "Embedding is not supported for API type {:?}" , api_type)
134140 }
Original file line number Diff line number Diff line change 1+ use async_openai:: Client as OpenAIClient ;
2+ use async_openai:: config:: OpenAIConfig ;
3+
4+ pub use super :: openai:: Client ;
5+
6+ impl Client {
7+ pub async fn new_vllm ( address : Option < String > ) -> anyhow:: Result < Self > {
8+ let address = address. unwrap_or_else ( || "http://127.0.0.1:8000/v1" . to_string ( ) ) ;
9+ let api_key = std:: env:: var ( "VLLM_API_KEY" ) . ok ( ) ;
10+ let mut config = OpenAIConfig :: new ( ) . with_api_base ( address) ;
11+ if let Some ( api_key) = api_key {
12+ config = config. with_api_key ( api_key) ;
13+ }
14+ Ok ( Client :: from_parts ( OpenAIClient :: with_config ( config) ) )
15+ }
16+ }
You can’t perform that action at this time.
0 commit comments