feat(llm): support switch graph in api & add some query configs (#184)

afterimagex · imbajin · web-flow · commit 759b963eabff · 2025-03-03T16:14:48.000+08:00
TODO: we need wrapper the query configs

---------

Co-authored-by: imbajin &lt;jin@apache.org&gt;
diff --git a/.github/workflows/hugegraph-python-client.yml b/.github/workflows/hugegraph-python-client.yml
@@ -20,7 +20,7 @@ jobs:
     - name: Prepare HugeGraph Server Environment
       run: |
         docker run -d --name=graph -p 8080:8080 -e PASSWORD=admin hugegraph/hugegraph:1.3.0
-        sleep 1
+        sleep 5
 
     - uses: actions/checkout@v4
 
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ And here are links of other repositories:
 
 - Welcome to contribute to HugeGraph, please see [Guidelines](https://hugegraph.apache.org/docs/contribution-guidelines/) for more information.  
 - Note: It's recommended to use [GitHub Desktop](https://desktop.github.com/) to greatly simplify the PR and commit process.  
-- Code format: Please run [`./style/code_format_and_analysis.sh`](style/code_format_and_analysis.sh) to format your code before submitting a PR.
+- Code format: Please run [`./style/code_format_and_analysis.sh`](style/code_format_and_analysis.sh) to format your code before submitting a PR. (Use `pylint` to check code style)
 - Thank you to all the people who already contributed to HugeGraph!
 
 [![contributors graph](https://contrib.rocks/image?repo=apache/incubator-hugegraph-ai)](https://github.com/apache/incubator-hugegraph-ai/graphs/contributors)
diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -23,8 +23,17 @@
 from hugegraph_llm.config import prompt
 
 
+class GraphConfigRequest(BaseModel):
+    ip: str = Query('127.0.0.1', description="hugegraph client ip.")
+    port: str = Query('8080', description="hugegraph client port.")
+    name: str = Query('hugegraph', description="hugegraph client name.")
+    user: str = Query('', description="hugegraph client user.")
+    pwd: str = Query('', description="hugegraph client pwd.")
+    gs: str = None
+
+
 class RAGRequest(BaseModel):
-    query: str = Query("", description="Query you want to ask")
+    query: str = Query(..., description="Query you want to ask")
     raw_answer: bool = Query(False, description="Use LLM to generate answer directly")
     vector_only: bool = Query(False, description="Use LLM to generate answer with vector")
     graph_only: bool = Query(True, description="Use LLM to generate answer with graph RAG only")
@@ -33,6 +42,16 @@ class RAGRequest(BaseModel):
     rerank_method: Literal["bleu", "reranker"] = Query("bleu", description="Method to rerank the results.")
     near_neighbor_first: bool = Query(False, description="Prioritize near neighbors in the search results.")
     custom_priority_info: str = Query("", description="Custom information to prioritize certain results.")
+    # Graph Configs
+    max_graph_items: int = Query(30, description="Maximum number of items for GQL queries in graph.")
+    topk_return_results: int = Query(20, description="Number of sorted results to return finally.")
+    vector_dis_threshold: float = Query(0.9, description="Threshold for vector similarity\
+                                         (results greater than this will be ignored).")
+    topk_per_keyword : int = Query(1, description="TopK results returned for each keyword \
+                                   extracted from the query, by default only the most similar one is returned.")
+    client_config: Optional[GraphConfigRequest] = Query(None, description="hugegraph server config.")
+
+    # Keep prompt params in the end
     answer_prompt: Optional[str] = Query(prompt.answer_prompt, description="Prompt to guide the answer generation.")
     keywords_extract_prompt: Optional[str] = Query(
         prompt.keywords_extract_prompt,
@@ -47,7 +66,18 @@ class RAGRequest(BaseModel):
 
 # TODO: import the default value of prompt.* dynamically
 class GraphRAGRequest(BaseModel):
-    query: str = Query("", description="Query you want to ask")
+    query: str = Query(..., description="Query you want to ask")
+    # Graph Configs
+    max_graph_items: int = Query(30, description="Maximum number of items for GQL queries in graph.")
+    topk_return_results: int = Query(20, description="Number of sorted results to return finally.")
+    vector_dis_threshold: float = Query(0.9, description="Threshold for vector similarity \
+                                        (results greater than this will be ignored).")
+    topk_per_keyword : int = Query(1, description="TopK results returned for each keyword extracted\
+                                    from the query, by default only the most similar one is returned.")
+
+    client_config : Optional[GraphConfigRequest] = Query(None, description="hugegraph server config.")
+    get_vid_only: bool = Query(False, description="return only keywords & vid (early stop).")
+
     gremlin_tmpl_num: int = Query(
         1, description="Number of Gremlin templates to use. If num <=0 means template is not provided"
     )
@@ -60,15 +90,6 @@ class GraphRAGRequest(BaseModel):
     )
 
 
-class GraphConfigRequest(BaseModel):
-    ip: str = "127.0.0.1"
-    port: str = "8080"
-    name: str = "hugegraph"
-    user: str = "xxx"
-    pwd: str = "xxx"
-    gs: str = None
-
-
 class LLMConfigRequest(BaseModel):
     llm_type: str
     # The common parameters shared by OpenAI, Qianfan Wenxin,
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -27,6 +27,7 @@
     RerankerConfigRequest,
     GraphRAGRequest,
 )
+from hugegraph_llm.config import huge_settings
 from hugegraph_llm.api.models.rag_response import RAGResponse
 from hugegraph_llm.config import llm_settings, prompt
 from hugegraph_llm.utils.log import log
@@ -43,6 +44,8 @@ def rag_http_api(
 ):
     @router.post("/rag", status_code=status.HTTP_200_OK)
     def rag_answer_api(req: RAGRequest):
+        set_graph_config(req)
+
         result = rag_answer_func(
             text=req.query,
             raw_answer=req.raw_answer,
@@ -52,10 +55,15 @@ def rag_answer_api(req: RAGRequest):
             graph_ratio=req.graph_ratio,
             rerank_method=req.rerank_method,
             near_neighbor_first=req.near_neighbor_first,
+            gremlin_tmpl_num=req.gremlin_tmpl_num,
+            max_graph_items=req.max_graph_items,
+            topk_return_results=req.topk_return_results,
+            vector_dis_threshold=req.vector_dis_threshold,
+            topk_per_keyword=req.topk_per_keyword,
+            # Keep prompt params in the end
             custom_related_information=req.custom_priority_info,
             answer_prompt=req.answer_prompt or prompt.answer_prompt,
             keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
-            gremlin_tmpl_num=req.gremlin_tmpl_num,
             gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
         )
         # TODO: we need more info in the response for users to understand the query logic
@@ -68,16 +76,32 @@ def rag_answer_api(req: RAGRequest):
             },
         }
 
+    def set_graph_config(req):
+        if req.client_config:
+            huge_settings.graph_ip = req.client_config.ip
+            huge_settings.graph_port = req.client_config.port
+            huge_settings.graph_name = req.client_config.name
+            huge_settings.graph_user = req.client_config.user
+            huge_settings.graph_pwd = req.client_config.pwd
+            huge_settings.graph_space = req.client_config.gs
+
     @router.post("/rag/graph", status_code=status.HTTP_200_OK)
     def graph_rag_recall_api(req: GraphRAGRequest):
         try:
+            set_graph_config(req)
+
             result = graph_rag_recall_func(
                 query=req.query,
+                max_graph_items=req.max_graph_items,
+                topk_return_results=req.topk_return_results,
+                vector_dis_threshold=req.vector_dis_threshold,
+                topk_per_keyword=req.topk_per_keyword,
                 gremlin_tmpl_num=req.gremlin_tmpl_num,
                 rerank_method=req.rerank_method,
                 near_neighbor_first=req.near_neighbor_first,
                 custom_related_information=req.custom_priority_info,
                 gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                get_vid_only=req.get_vid_only
             )
 
             if isinstance(result, dict):
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -43,6 +43,10 @@ def rag_answer(
     keywords_extract_prompt: str,
     gremlin_tmpl_num: Optional[int] = 2,
     gremlin_prompt: Optional[str] = None,
+    max_graph_items=30,
+    topk_return_results=20,
+    vector_dis_threshold=0.9,
+    topk_per_keyword=1,
 ) -> Tuple:
     """
     Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline.
@@ -79,22 +83,28 @@ def rag_answer(
     if vector_search:
         rag.query_vector_index()
     if graph_search:
-        rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid().import_schema(
+        rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid(
+            vector_dis_threshold=vector_dis_threshold,
+            topk_per_keyword=topk_per_keyword,
+        ).import_schema(
             huge_settings.graph_name
         ).query_graphdb(
             num_gremlin_generate_example=gremlin_tmpl_num,
             gremlin_prompt=gremlin_prompt,
+            max_graph_items=max_graph_items
         )
     # TODO: add more user-defined search strategies
     rag.merge_dedup_rerank(
-        graph_ratio,
-        rerank_method,
-        near_neighbor_first,
+        graph_ratio=graph_ratio,
+        rerank_method=rerank_method,
+        near_neighbor_first=near_neighbor_first,
+        topk_return_results=topk_return_results
     )
     rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)
 
     try:
-        context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search)
+        context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search,
+                          max_graph_items=max_graph_items)
         if context.get("switch_to_bleu"):
             gr.Warning("Online reranker fails, automatically switches to local bleu rerank.")
         return (
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
@@ -188,17 +188,28 @@ def graph_rag_recall(
     near_neighbor_first: bool,
     custom_related_information: str,
     gremlin_prompt: str,
+    max_graph_items: int,
+    topk_return_results: int,
+    vector_dis_threshold: float,
+    topk_per_keyword: int,
+    get_vid_only: bool
 ) -> dict:
     store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt)
     rag = RAGPipeline()
-
-    rag.extract_keywords().keywords_to_vid().import_schema(huge_settings.graph_name).query_graphdb(
-        num_gremlin_generate_example=gremlin_tmpl_num,
-        gremlin_prompt=gremlin_prompt,
-    ).merge_dedup_rerank(
-        rerank_method=rerank_method,
-        near_neighbor_first=near_neighbor_first,
-        custom_related_information=custom_related_information,
-    )
+    rag.extract_keywords().keywords_to_vid(
+            vector_dis_threshold=vector_dis_threshold,
+            topk_per_keyword=topk_per_keyword,
+        )
+    if not get_vid_only:
+        rag.import_schema(huge_settings.graph_name).query_graphdb(
+            num_gremlin_generate_example=gremlin_tmpl_num,
+            gremlin_prompt=gremlin_prompt,
+            max_graph_items=max_graph_items,
+        ).merge_dedup_rerank(
+            rerank_method=rerank_method,
+            near_neighbor_first=near_neighbor_first,
+            custom_related_information=custom_related_information,
+            topk_return_results=topk_return_results,
+        )
     context = rag.run(verbose=True, query=query, graph_search=True)
     return context
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
@@ -44,7 +44,7 @@ class MergeDedupRerank:
     def __init__(
         self,
         embedding: BaseEmbedding,
-        topk: int = huge_settings.topk_return_results,
+        topk_return_results: int = huge_settings.topk_return_results,
         graph_ratio: float = 0.5,
         method: Literal["bleu", "reranker"] = "bleu",
         near_neighbor_first: bool = False,
@@ -54,7 +54,7 @@ def __init__(
         assert method in ["bleu", "reranker"], f"Unimplemented rerank method '{method}'."
         self.embedding = embedding
         self.graph_ratio = graph_ratio
-        self.topk = topk
+        self.topk_return_results = topk_return_results
         self.method = method
         self.near_neighbor_first = near_neighbor_first
         self.custom_related_information = custom_related_information
@@ -70,11 +70,11 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
         vector_search = context.get("vector_search", False)
         graph_search = context.get("graph_search", False)
         if graph_search and vector_search:
-            graph_length = int(self.topk * self.graph_ratio)
-            vector_length = self.topk - graph_length
+            graph_length = int(self.topk_return_results * self.graph_ratio)
+            vector_length = self.topk_return_results - graph_length
         else:
-            graph_length = self.topk
-            vector_length = self.topk
+            graph_length = self.topk_return_results
+            vector_length = self.topk_return_results
 
         vector_result = context.get("vector_result", [])
         vector_length = min(len(vector_result), vector_length)
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -100,12 +100,14 @@ def keywords_to_vid(
         by: Literal["query", "keywords"] = "keywords",
         topk_per_keyword: int = huge_settings.topk_per_keyword,
         topk_per_query: int = 10,
+        vector_dis_threshold: float = huge_settings.vector_dis_threshold,
     ):
         """
         Add a semantic ID query operator to the pipeline.
         :param by: Match by query or keywords.
         :param topk_per_keyword: Top K results per keyword.
         :param topk_per_query: Top K results per query.
+        :param vector_dis_threshold: Vector distance threshold.
         :return: Self-instance for chaining.
         """
         self._operators.append(
@@ -114,6 +116,7 @@ def keywords_to_vid(
                 by=by,
                 topk_per_keyword=topk_per_keyword,
                 topk_per_query=topk_per_query,
+                vector_dis_threshold=vector_dis_threshold,
             )
         )
         return self
@@ -174,6 +177,7 @@ def merge_dedup_rerank(
         rerank_method: Literal["bleu", "reranker"] = "bleu",
         near_neighbor_first: bool = False,
         custom_related_information: str = "",
+        topk_return_results: int = huge_settings.topk_return_results,
     ):
         """
         Add a merge, deduplication, and rerank operator to the pipeline.
@@ -187,6 +191,7 @@ def merge_dedup_rerank(
                 method=rerank_method,
                 near_neighbor_first=near_neighbor_first,
                 custom_related_information=custom_related_information,
+                topk_return_results=topk_return_results
             )
         )
         return self
@@ -239,7 +244,9 @@ def run(self, **kwargs) -> Dict[str, Any]:
         :return: Final context after all operators have been executed.
         """
         if len(self._operators) == 0:
-            self.extract_keywords().query_graphdb().synthesize_answer()
+            self.extract_keywords().query_graphdb(
+                max_graph_items=kwargs.get('max_graph_items')
+            ).synthesize_answer()
 
         context = kwargs
 
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
@@ -34,14 +34,16 @@ def __init__(
             embedding: BaseEmbedding,
             by: Literal["query", "keywords"] = "keywords",
             topk_per_query: int = 10,
-            topk_per_keyword: int = huge_settings.topk_per_keyword
+            topk_per_keyword: int = huge_settings.topk_per_keyword,
+            vector_dis_threshold: float = huge_settings.vector_dis_threshold,
     ):
         self.index_dir = str(os.path.join(resource_path, huge_settings.graph_name, "graph_vids"))
         self.vector_index = VectorIndex.from_index_file(self.index_dir)
         self.embedding = embedding
         self.by = by
         self.topk_per_query = topk_per_query
         self.topk_per_keyword = topk_per_keyword
+        self.vector_dis_threshold = vector_dis_threshold
         self._client = PyHugeClient(
             huge_settings.graph_ip,
             huge_settings.graph_port,
@@ -76,7 +78,7 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]:
         for keyword in keywords:
             keyword_vector = self.embedding.get_text_embedding(keyword)
             results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword,
-                                               dis_threshold=float(huge_settings.vector_dis_threshold))
+                                               dis_threshold=float(self.vector_dis_threshold))
             if results:
                 fuzzy_match_result.extend(results[:self.topk_per_keyword])
         return fuzzy_match_result
diff --git a/hugegraph-ml/README.md b/hugegraph-ml/README.md
diff --git a/hugegraph-python-client/README.md b/hugegraph-python-client/README.md