feat(llm): support litellm for multi-LLM provider (#178)

coderzc · imbajin · web-flow · commit 3e0bf46bcf84 · 2025-02-27T16:50:12.000+08:00
* Update README.md

---------

Co-authored-by: imbajin &lt;jin@apache.org&gt;
diff --git a/hugegraph-llm/.gitignore b/hugegraph-llm/.gitignore
@@ -1,3 +1,5 @@
 src/hugegraph_llm/resources/demo/questions_answers.xlsx
 src/hugegraph_llm/resources/demo/questions.xlsx
 src/hugegraph_llm/resources/backup-graph-data-4020/
+
+uv.lock
diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
@@ -67,6 +67,7 @@ graph systems and large language models.
     ```bash
     python -m hugegraph_llm.config.generate --update
     ```
+    Note: `Litellm` support multi-LLM provider, refer [litellm.ai](https://docs.litellm.ai/docs/providers) to config it
 7. (__Optional__) You could use 
     [hugegraph-hubble](https://hugegraph.apache.org/docs/quickstart/hugegraph-hubble/#21-use-docker-convenient-for-testdev) 
     to visit the graph data, could run it via [Docker/Docker-Compose](https://hub.docker.com/r/hugegraph/hubble) 
diff --git a/hugegraph-llm/poetry.lock b/hugegraph-llm/poetry.lock
diff --git a/hugegraph-llm/pyproject.toml b/hugegraph-llm/pyproject.toml
@@ -24,7 +24,7 @@ authors = [
 ]
 readme = "README.md"
 license = "Apache-2.0"
-requires-python = "^3.10"
+requires-python = ">=3.10,<3.12"
 maintainers = [
     { name = "Apache HugeGraph Contributors", email = "dev@hugegraph.apache.org" },
 ]
@@ -38,7 +38,7 @@ documentation = "https://hugegraph.apache.org/docs/quickstart/hugegraph-ai/"
 
 [tool.poetry.dependencies]
 python = "^3.10,<3.12"
-openai = "~1.47.1"
+openai = "~1.61.0"
 ollama = "~0.2.1"
 qianfan = "~0.3.18"
 retry = "~0.9.2"
@@ -61,6 +61,7 @@ setuptools = "~70.0.0"
 urllib3 = "~2.2.2"
 rich = "~13.9.4"
 apscheduler= "~3.10.4"
+litellm = "~1.61.13"
 hugegraph-python = { path = "../hugegraph-python-client/", develop = true }
 
 [build-system]
diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
@@ -1,4 +1,4 @@
-openai~=1.47.1
+openai~=1.61.0
 ollama~=0.2.1
 qianfan~=0.3.18
 retry~=0.9.2
@@ -16,3 +16,4 @@ pandas~=2.2.2
 openpyxl~=3.1.5
 pydantic-settings~=2.6.1
 apscheduler~=3.10.4
+litellm~=1.61.13
diff --git a/hugegraph-llm/src/hugegraph_llm/config/llm_config.py b/hugegraph-llm/src/hugegraph_llm/config/llm_config.py
@@ -25,10 +25,10 @@
 class LLMConfig(BaseConfig):
     """LLM settings"""
 
-    chat_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    extract_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    text2gql_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    embedding_type: Optional[Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"]] = "openai"
+    chat_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    extract_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    text2gql_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    embedding_type: Optional[Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"]] = "openai"
     reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
     # 1. OpenAI settings
     openai_chat_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
@@ -84,14 +84,19 @@ class LLMConfig(BaseConfig):
     qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
     # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more details
     qianfan_embedding_model: Optional[str] = "embedding-v1"
-    # TODO: To be confirmed, whether to configure
-    # 5. ZhiPu(GLM) settings
-    zhipu_chat_api_key: Optional[str] = None
-    zhipu_chat_language_model: Optional[str] = "glm-4"
-    zhipu_chat_embedding_model: Optional[str] = "embedding-2"
-    zhipu_extract_api_key: Optional[str] = None
-    zhipu_extract_language_model: Optional[str] = "glm-4"
-    zhipu_extract_embedding_model: Optional[str] = "embedding-2"
-    zhipu_text2gql_api_key: Optional[str] = None
-    zhipu_text2gql_language_model: Optional[str] = "glm-4"
-    zhipu_text2gql_embedding_model: Optional[str] = "embedding-2"
+    # 5. LiteLLM settings
+    litellm_chat_api_key: Optional[str] = None
+    litellm_chat_api_base: Optional[str] = None
+    litellm_chat_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_chat_tokens: int = 8192
+    litellm_extract_api_key: Optional[str] = None
+    litellm_extract_api_base: Optional[str] = None
+    litellm_extract_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_extract_tokens: int = 256
+    litellm_text2gql_api_key: Optional[str] = None
+    litellm_text2gql_api_base: Optional[str] = None
+    litellm_text2gql_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_text2gql_tokens: int = 4096
+    litellm_embedding_api_key: Optional[str] = None
+    litellm_embedding_api_base: Optional[str] = None
+    litellm_embedding_model: Optional[str] = "openai/text-embedding-3-small"
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
@@ -24,11 +24,44 @@
 from requests.auth import HTTPBasicAuth
 
 from hugegraph_llm.config import huge_settings, llm_settings
+from hugegraph_llm.models.embeddings.litellm import LiteLLMEmbedding
+from hugegraph_llm.models.llms.litellm import LiteLLMClient
 from hugegraph_llm.utils.log import log
 
 current_llm = "chat"
 
 
+def test_litellm_embedding(api_key, api_base, model_name) -> int:
+    llm_client = LiteLLMEmbedding(
+            api_key = api_key,
+            api_base = api_base,
+            model_name = model_name,
+        )
+    try:
+        response = llm_client.get_text_embedding("test")
+        assert len(response) > 0
+    except Exception as e:
+        raise gr.Error(f"Error in litellm embedding call: {e}") from e
+    gr.Info("Test connection successful~")
+    return 200
+
+
+def test_litellm_chat(api_key, api_base, model_name, max_tokens: int) -> int:
+    try:
+        llm_client = LiteLLMClient(
+            api_key=api_key,
+            api_base=api_base,
+            model_name=model_name,
+            max_tokens=max_tokens,
+        )
+        response = llm_client.generate(messages=[{"role": "user", "content": "hi"}])
+        assert len(response) > 0
+    except Exception as e:
+        raise gr.Error(f"Error in litellm chat call: {e}") from e
+    gr.Info("Test connection successful~")
+    return 200
+
+
 def test_api_connection(url, method="GET", headers=None, params=None, body=None, auth=None, origin_call=None) -> int:
     # TODO: use fastapi.request / starlette instead?
     log.debug("Request URL: %s", url)
@@ -97,6 +130,11 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int:
         llm_settings.ollama_embedding_port = int(arg2)
         llm_settings.ollama_embedding_model = arg3
         status_code = test_api_connection(f"http://{arg1}:{arg2}", origin_call=origin_call)
+    elif embedding_option == "litellm":
+        llm_settings.litellm_embedding_api_key = arg1
+        llm_settings.litellm_embedding_api_base = arg2
+        llm_settings.litellm_embedding_model = arg3
+        status_code = test_litellm_embedding(arg1, arg2, arg3)
     llm_settings.update_env()
     gr.Info("Configured!")
     return status_code
@@ -173,7 +211,6 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non
         setattr(llm_settings, f"openai_{current_llm_config}_tokens", int(arg4))
 
         test_url = getattr(llm_settings, f"openai_{current_llm_config}_api_base") + "/chat/completions"
-        log.debug("Type of OpenAI %s max_token is %s", current_llm_config, type(arg4))
         data = {
             "model": arg3,
             "temperature": 0.0,
@@ -192,6 +229,14 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non
         setattr(llm_settings, f"ollama_{current_llm_config}_language_model", arg3)
         status_code = test_api_connection(f"http://{arg1}:{arg2}", origin_call=origin_call)
 
+    elif llm_option == "litellm":
+        setattr(llm_settings, f"litellm_{current_llm_config}_api_key", arg1)
+        setattr(llm_settings, f"litellm_{current_llm_config}_api_base", arg2)
+        setattr(llm_settings, f"litellm_{current_llm_config}_language_model", arg3)
+        setattr(llm_settings, f"litellm_{current_llm_config}_tokens", int(arg4))
+
+        status_code = test_litellm_chat(arg1, arg2, arg3, int(arg4))
+
     gr.Info("Configured!")
     llm_settings.update_env()
     return status_code
@@ -218,7 +263,7 @@ def create_configs_block() -> list:
     with gr.Accordion("2. Set up the LLM.", open=False):
         gr.Markdown("> Tips: the openai option also support openai style api from other providers.")
         with gr.Tab(label='chat'):
-            chat_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            chat_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                             value=getattr(llm_settings, "chat_llm_type"), label="type")
             apply_llm_config_with_chat_op = partial(apply_llm_config, "chat")
 
@@ -249,13 +294,23 @@ def chat_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_chat_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
                 llm_config_button.click(apply_llm_config_with_chat_op, inputs=llm_config_input)
 
         with gr.Tab(label='mini_tasks'):
-            extract_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            extract_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                                value=getattr(llm_settings, "extract_llm_type"), label="type")
             apply_llm_config_with_extract_op = partial(apply_llm_config, "extract")
 
@@ -286,12 +341,22 @@ def extract_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_extract_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
                 llm_config_button.click(apply_llm_config_with_extract_op, inputs=llm_config_input)
         with gr.Tab(label='text2gql'):
-            text2gql_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            text2gql_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                                 value=getattr(llm_settings, "text2gql_llm_type"), label="type")
             apply_llm_config_with_text2gql_op = partial(apply_llm_config, "text2gql")
 
@@ -322,14 +387,25 @@ def text2gql_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_text2gql_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
                 llm_config_button.click(apply_llm_config_with_text2gql_op, inputs=llm_config_input)
 
     with gr.Accordion("3. Set up the Embedding.", open=False):
         embedding_dropdown = gr.Dropdown(
-            choices=["openai", "qianfan_wenxin", "ollama/local"], value=llm_settings.embedding_type, label="Embedding"
+            choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"], value=llm_settings.embedding_type,
+            label="Embedding"
         )
 
         @gr.render(inputs=[embedding_dropdown])
@@ -357,6 +433,16 @@ def embedding_settings(embedding_type):
                                    type="password"),
                         gr.Textbox(value=llm_settings.qianfan_embedding_model, label="model_name"),
                     ]
+            elif embedding_type == "litellm":
+                with gr.Row():
+                    embedding_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/embedding/supported_embedding"),
+                    ]
             else:
                 embedding_config_input = [
                     gr.Textbox(value="", visible=False),
diff --git a/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py b/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py
@@ -19,6 +19,7 @@
 from hugegraph_llm.models.embeddings.openai import OpenAIEmbedding
 from hugegraph_llm.models.embeddings.ollama import OllamaEmbedding
 from hugegraph_llm.models.embeddings.qianfan import QianFanEmbedding
+from hugegraph_llm.models.embeddings.litellm import LiteLLMEmbedding
 from hugegraph_llm.config import llm_settings
 
 
@@ -45,5 +46,11 @@ def get_embedding(self):
                 api_key=llm_settings.qianfan_embedding_api_key,
                 secret_key=llm_settings.qianfan_embedding_secret_key
             )
+        if self.embedding_type == "litellm":
+            return LiteLLMEmbedding(
+                model_name=llm_settings.litellm_embedding_model,
+                api_key=llm_settings.litellm_embedding_api_key,
+                api_base=llm_settings.litellm_embedding_api_base
+            )
 
         raise Exception("embedding type is not supported !")
diff --git a/hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py b/hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Optional
+
+from litellm import embedding, RateLimitError, APIError, APIConnectionError, aembedding
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+
+from hugegraph_llm.models.embeddings.base import BaseEmbedding
+from hugegraph_llm.utils.log import log
+
+
+class LiteLLMEmbedding(BaseEmbedding):
+    """Wrapper for LiteLLM Embedding that supports multiple LLM providers."""
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        model_name: str = "openai/text-embedding-3-small",  # Can be any embedding model supported by LiteLLM
+    ) -> None:
+        self.api_key = api_key
+        self.api_base = api_base
+        self.model = model_name
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((RateLimitError, APIConnectionError, APIError)),
+    )
+    def get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a single text."""
+        try:
+            response = embedding(
+                model=self.model,
+                input=text,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.data[0]["embedding"]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in LiteLLM embedding call: %s", e)
+            raise
+
+    def get_texts_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get embeddings for multiple texts."""
+        try:
+            response = embedding(
+                model=self.model,
+                input=texts,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return [data["embedding"] for data in response.data]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in LiteLLM batch embedding call: %s", e)
+            raise
+
+    async def async_get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a single text asynchronously."""
+        try:
+            response = await aembedding(
+                model=self.model,
+                input=text,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.data[0]["embedding"]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in async LiteLLM embedding call: %s", e)
+            raise
diff --git a/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py b/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py
diff --git a/hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py b/hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py