apache · MrJs133 · Dec 9, 2024 · Jan 3, 2025 · Jan 10, 2025 · Jan 16, 2025
diff --git a/hugegraph-llm/src/hugegraph_llm/api/vector_api.py b/hugegraph-llm/src/hugegraph_llm/api/vector_api.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from datetime import date
+from fastapi import status, APIRouter, HTTPException
+from hugegraph_llm.utils.log import log
+
+API_CALL_TRACKER = {}
+
+# pylint: disable=too-many-statements
+def vector_http_api(
+    router: APIRouter,
+    update_embedding_func,
+):
+    @router.post("/vector/embedding", status_code=status.HTTP_200_OK)
+    def update_embedding_api(daily_limit: int = 2):
+        today = date.today()
+        for call_date in list(API_CALL_TRACKER.keys()):
+            if call_date != today:
+                del API_CALL_TRACKER[call_date]
+        call_count = API_CALL_TRACKER.get(today, 0)
+        if call_count >= daily_limit:
+            log.error("Rate limit exceeded for update_vid_embedding. Maximum %d calls per day.", daily_limit)
+            raise HTTPException(
+                status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                detail=f"API call limit of {daily_limit} per day exceeded. Please try again tomorrow."
+            )
+        API_CALL_TRACKER[today] = call_count + 1
+        result = update_embedding_func()
+        return result
diff --git a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
@@ -218,6 +218,8 @@ class PromptConfig(BasePromptConfig):
 - You may use the vertex ID directly if it’s provided in the context.
 - If the provided question contains entity names that are very similar to the Vertices IDs, then in the generated Gremlin statement, replace the approximate entities from the original question.
 For example, if the question includes the name ABC, and the provided VerticesIDs do not contain ABC but only abC, then use abC instead of ABC from the original question when generating the gremlin.
+- Similarly, if the user's query refers to specific property names or their values, and these are present or align with the 'Referenced Extracted Properties', actively utilize these properties in your Gremlin query.
+For instance, you can use them for filtering vertices or edges (e.g., using `has('propertyName', 'propertyValue')`), or for projecting specific values.
 
 The output format must be as follows:
 ```gremlin
@@ -231,6 +233,9 @@ class PromptConfig(BasePromptConfig):
 Referenced Extracted Vertex IDs Related to the Query:
 {vertices}
 
+Referenced Extracted Properties Related to the Query (Format: [('property_name', 'property_value'), ...]):
+{properties}
+
 Generate Gremlin from the Following User Query:
 {query}
 

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -23,6 +23,7 @@
 
 from hugegraph_llm.api.admin_api import admin_http_api
 from hugegraph_llm.api.rag_api import rag_http_api
+from hugegraph_llm.api.vector_api import vector_http_api
 from hugegraph_llm.config import admin_settings, huge_settings, prompt
 from hugegraph_llm.demo.rag_demo.admin_block import create_admin_block, log_stream
 from hugegraph_llm.demo.rag_demo.configs_block import (
@@ -32,6 +33,7 @@
     apply_reranker_config,
     apply_graph_config,
 )
+from hugegraph_llm.utils.graph_index_utils import update_vid_embedding
 from hugegraph_llm.demo.rag_demo.other_block import create_other_block
 from hugegraph_llm.demo.rag_demo.other_block import lifespan
 from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
@@ -173,7 +175,7 @@ def create_app():
         apply_reranker_config,
     )
     admin_http_api(api_auth, log_stream)
-
+    vector_http_api(api_auth, update_vid_embedding)
     app.include_router(api_auth)
     # Mount Gradio inside FastAPI
     # TODO: support multi-user login when need

diff --git a/hugegraph-llm/src/hugegraph_llm/operators/gremlin_generate_task.py b/hugegraph-llm/src/hugegraph_llm/operators/gremlin_generate_task.py
@@ -59,9 +59,13 @@ def example_index_query(self, num_examples):
         return self
 
     def gremlin_generate_synthesize(
-        self, schema, gremlin_prompt: Optional[str] = None, vertices: Optional[List[str]] = None
+        self,
+        schema,
+        gremlin_prompt: Optional[str] = None,
+        vertices: Optional[List[str]] = None,
+        properties: Optional[List[tuple]] = None
     ):
-        self.operators.append(GremlinGenerateSynthesize(self.llm, schema, vertices, gremlin_prompt))
+        self.operators.append(GremlinGenerateSynthesize(self.llm, schema, vertices, gremlin_prompt, properties))
         return self
 
     def print_result(self):

diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py
@@ -25,6 +25,7 @@ class FetchGraphData:
 
     def __init__(self, graph: PyHugeClient):
         self.graph = graph
+        self.schema = self.graph.schema()
 
     def run(self, graph_summary: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         if graph_summary is None:
@@ -49,4 +50,19 @@ def res = [:];
 
         if isinstance(result, list) and len(result) > 0:
             graph_summary.update({key: result[i].get(key) for i, key in enumerate(keys)})
+
+        index_labels = self.schema.getIndexLabels()
+        if index_labels:
+            graph_summary["index_labels"] = [
+                {
+                    "id": label.id,
+                    "base_type": label.baseType,
+                    "base_value": label.baseValue,
+                    "name": label.name,
+                    "fields": label.fields,
+                    "index_type": label.indexType
+                } for label in index_labels
+            ]
+        else:
+            graph_summary["index_labels"] = []
         return graph_summary
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -53,7 +53,7 @@
 """
 
 PROPERTY_QUERY_NEIGHBOR_TPL = """\
-g.V().has('{prop}', within({keywords}))
+g.V().has('{current_prop_name}', '{current_prop_value}')
 .repeat(
    bothE({edge_labels}).limit({edge_limit}).otherV().dedup()
 ).times({max_deep}).emit()
@@ -65,8 +65,8 @@
 )
 .by(project('label', 'inV', 'outV', 'props')
    .by(label())
-   .by(inV().values('{prop}'))
-   .by(outV().values('{prop}'))
+   .by(inV().values('{current_prop_name}'))
+   .by(outV().values('{current_prop_name}'))
    .by(valueMap().by(unfold()))
 )
 .limit({max_items})
@@ -129,12 +129,13 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
     def _gremlin_generate_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
         query = context["query"]
         vertices = context.get("match_vids")
+        properties = context.get("match_props")
         query_embedding = context.get("query_embedding")
 
         self._gremlin_generator.clear()
         self._gremlin_generator.example_index_query(num_examples=self._num_gremlin_generate_example)
         gremlin_response = self._gremlin_generator.gremlin_generate_synthesize(
-            context["simple_schema"], vertices=vertices, gremlin_prompt=self._gremlin_prompt
+            context["simple_schema"], vertices=vertices, gremlin_prompt=self._gremlin_prompt, properties=properties
         ).run(query=query, query_embedding=query_embedding)
         if self._num_gremlin_generate_example > 0:
             gremlin = gremlin_response["result"]
@@ -160,12 +161,14 @@ def _gremlin_generate_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
     def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
         # 1. Extract params from context
         matched_vids = context.get("match_vids")
+        matched_props = context.get("match_props")
         if isinstance(context.get("max_deep"), int):
             self._max_deep = context["max_deep"]
         if isinstance(context.get("max_items"), int):
             self._max_items = context["max_items"]
-        if isinstance(context.get("prop_to_match"), str):
-            self._prop_to_match = context["prop_to_match"]
+        if isinstance(context.get("match_props"), list):
+            self._prop_to_match = matched_props[0][0] if matched_props else None
+            log.debug("Prop to match: %s", self._prop_to_match)
 
         # 2. Extract edge_labels from graph schema
         _, edge_labels = self._extract_labels_from_schema()
@@ -207,31 +210,34 @@ def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
                 vertex_degree_list[0].update(vertex_knowledge)
             else:
                 vertex_degree_list.append(vertex_knowledge)
-        else:
+        elif matched_props:
             # WARN: When will the query enter here?
-            keywords = context.get("keywords")
-            assert keywords, "No related property(keywords) for graph query."
-            keywords_str = ",".join("'" + kw + "'" for kw in keywords)
-            gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
-                prop=self._prop_to_match,
-                keywords=keywords_str,
-                edge_labels=edge_labels_str,
-                edge_limit=edge_limit_amount,
-                max_deep=self._max_deep,
-                max_items=self._max_items,
-            )
-            log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")
+            graph_chain_knowledge = set()
+            for prop_name, prop_value in matched_props:
+                self._prop_to_match = prop_name
+                gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
+                    current_prop_name=prop_name,
+                    current_prop_value=prop_value,
+                    edge_labels=edge_labels_str,
+                    edge_limit=edge_limit_amount,
+                    max_deep=self._max_deep,
+                    max_items=self._max_items
+                )
+                log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")
+                log.debug("property gremlin: %s", gremlin_query)
 
-            paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
-            graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
-                query_paths=paths
-            )
+                paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
+                log.debug("paths: %s", paths)
+                temp_graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
+                    query_paths=paths
+                )
+                graph_chain_knowledge.update(temp_graph_chain_knowledge)
 
         context["graph_result"] = list(graph_chain_knowledge)
         if context["graph_result"]:
             context["graph_result_flag"] = 0
             context["vertex_degree_list"] = [list(vertex_degree) for vertex_degree in vertex_degree_list]
-            context["knowledge_with_degree"] = knowledge_with_degree
+            context["knowledge_with_degree"] = knowledge_with_degree # pylint: disable=possibly-used-before-assignment
             context["graph_context_head"] = (
                 f"The following are graph knowledge in {self._max_deep} depth, e.g:\n"
                 "`vertexA--[links]-->vertexB<--[links]--vertexC ...`"
@@ -340,7 +346,7 @@ def _process_vertex(
             node_str = matched_str
         else:
             v_cache.add(matched_str)
-            node_str = f"{item['id']}{{{props_str}}}"
+            node_str = f"{item['id']}{{{props_str}}}" if use_id_to_match else f"{item['props']}{{{props_str}}}"
 
         flat_rel += node_str
         nodes_with_degree.append(node_str)