apache · MrJs133 · Dec 9, 2024 · Jan 3, 2025 · Jan 10, 2025 · Jan 16, 2025
diff --git a/README.md b/README.md
@@ -3,9 +3,8 @@
 [![License](https://img.shields.io/badge/license-Apache%202-0E78BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
 [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/apache/incubator-hugegraph-ai)
 
-`hugegraph-ai` aims to explore the integration of [HugeGraph](https://github.com/apache/hugegraph) with artificial 
-intelligence (AI) and provide comprehensive support for developers to leverage HugeGraph's AI capabilities 
-in their projects.
+`hugegraph-ai` aims to explore the integration of HugeGraph with artificial intelligence (AI) and provide comprehensive support for 
+developers to leverage HugeGraph's AI capabilities in their projects.
 
 
 ## Modules
@@ -22,37 +21,17 @@ to seamlessly connect with third-party graph-related ML frameworks.
 It is used to define graph structures and perform CRUD operations on graph data. Both the `hugegraph-llm` and 
   `hugegraph-ml` modules will depend on this foundational library. 
 
-## Learn More
-
-The [project homepage](https://hugegraph.apache.org/docs/quickstart/hugegraph-ai/) contains more information about 
-hugegraph-ai.
-
-And here are links of other repositories:
-1. [hugegraph](https://github.com/apache/hugegraph) (graph's core component - Graph server + PD + Store)
-2. [hugegraph-toolchain](https://github.com/apache/hugegraph-toolchain) (graph tools **[loader](https://github.com/apache/incubator-hugegraph-toolchain/tree/master/hugegraph-loader)/[dashboard](https://github.com/apache/incubator-hugegraph-toolchain/tree/master/hugegraph-hubble)/[tool](https://github.com/apache/incubator-hugegraph-toolchain/tree/master/hugegraph-tools)/[client](https://github.com/apache/incubator-hugegraph-toolchain/tree/master/hugegraph-client)**)
-3. [hugegraph-computer](https://github.com/apache/hugegraph-computer) (integrated **graph computing** system)
-4. [hugegraph-website](https://github.com/apache/hugegraph-doc) (**doc & website** code)
-
-
 ## Contributing
 
-- Welcome to contribute to HugeGraph, please see [Guidelines](https://hugegraph.apache.org/docs/contribution-guidelines/) for more information.  
-- Note: It's recommended to use [GitHub Desktop](https://desktop.github.com/) to greatly simplify the PR and commit process.  
 - Code format: Please run [`./style/code_format_and_analysis.sh`](style/code_format_and_analysis.sh) to format your code before submitting a PR. (Use `pylint` to check code style)
 - Thank you to all the people who already contributed to HugeGraph!
 
-[![contributors graph](https://contrib.rocks/image?repo=apache/incubator-hugegraph-ai)](https://github.com/apache/incubator-hugegraph-ai/graphs/contributors)
-
-
 ## License
 
 hugegraph-ai is licensed under [Apache 2.0 License](./LICENSE).
 
 
 ## Contact Us
 
- - [GitHub Issues](https://github.com/apache/incubator-hugegraph-ai/issues): Feedback on usage issues and functional requirements (quick response)
- - Feedback Email: [[email protected]](mailto:[email protected]) ([subscriber](https://hugegraph.apache.org/docs/contribution-guidelines/subscribe/) only)
- - WeChat public account: Apache HugeGraph, welcome to scan this QR code to follow us.
-
- <img src="https://raw.githubusercontent.com/apache/hugegraph-doc/master/assets/images/wechat.png" alt="QR png" width="350"/>
+ - 如流 HugeGraph Team/Group
+ - GraphRAG DevOps Team (🚧)
diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
@@ -38,7 +38,7 @@ graph systems and large language models.
 
 3. Clone this project
     ```bash
-    git clone https://github.com/apache/incubator-hugegraph-ai.git
+    git clone https://{username}@icode.baidu.com/baidu/starhugegraph/hugegraph-ai
     ```
 4. Configuration dependency environment
     ```bash
@@ -85,8 +85,8 @@ graph systems and large language models.
 - Docs:
   - text: Build rag index from plain text
   - file: Upload file(s) which should be <u>TXT</u> or <u>.docx</u> (Multiple files can be selected together)
-- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Except **2 types**)
-  - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125) 
+- [Schema](https://starhugegraph.github.io/hugegraph-doc/clients/restful-api-v3/schema.html): (Accept **2 types**)
+  - User-defined Schema (JSON format, follow the [template](https://console.cloud.baidu-int.com/devops/icode/repos/baidu/starhugegraph/hugegraph-ai/blob/master/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L173) 
   to modify it)
   - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like 
   **"hugegraph"**)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -15,20 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Optional, Literal
-
+from typing import Optional, Literal, List
+from enum import Enum
 from fastapi import Query
 from pydantic import BaseModel
 
 from hugegraph_llm.config import prompt
-
+from hugegraph_llm.config import huge_settings
 
 class GraphConfigRequest(BaseModel):
     url: str = Query('127.0.0.1:8080', description="hugegraph client url.")
-    name: str = Query('hugegraph', description="hugegraph client name.")
-    user: str = Query('', description="hugegraph client user.")
-    pwd: str = Query('', description="hugegraph client pwd.")
+    graph: str = Query('hugegraph', description="hugegraph client name.")
+    user: Optional[str] = Query('', description="hugegraph client user.")
+    pwd: Optional[str] = Query('', description="hugegraph client pwd.")
     gs: str = None
+    token: Optional[str] = Query('', description="hugegraph client token.")
 
 
 class RAGRequest(BaseModel):
@@ -116,3 +117,31 @@ class RerankerConfigRequest(BaseModel):
 class LogStreamRequest(BaseModel):
     admin_token: Optional[str] = None
     log_file: Optional[str] = "llm-server.log"
+
+class GremlinOutputType(str, Enum):
+    MATCH_RESULT = "match_result"
+    TEMPLATE_GREMLIN = "template_gremlin"
+    RAW_GREMLIN = "raw_gremlin"
+    TEMPLATE_EXECUTION_RESULT = "template_execution_result"
+    RAW_EXECUTION_RESULT = "raw_execution_result"
+
+class GremlinGenerateRequest(BaseModel):
+    query: str
+    example_num: Optional[int] = Query(
+        0,
+        description="Number of Gremlin templates to use.(0 means no templates)"
+    )
+    gremlin_prompt: Optional[str] = Query(
+        prompt.gremlin_generate_prompt,
+        description="Prompt for the Text2Gremlin query.",
+    )
+    client_config: Optional[GraphConfigRequest] = Query(None, description="hugegraph server config.")
+    output_types: Optional[List[GremlinOutputType]] = Query(
+        default=[GremlinOutputType.TEMPLATE_GREMLIN],
+        description="""
+        a list can contain "match_result","template_gremlin",
+        "raw_gremlin","template_execution_result","raw_execution_result"
+        You can specify which type of result do you need. Empty means all types.
+        """
+    )
+
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -26,6 +26,7 @@
     LLMConfigRequest,
     RerankerConfigRequest,
     GraphRAGRequest,
+    GremlinGenerateRequest,
 )
 from hugegraph_llm.config import huge_settings
 from hugegraph_llm.api.models.rag_response import RAGResponse
@@ -41,6 +42,7 @@ def rag_http_api(
     apply_llm_conf,
     apply_embedding_conf,
     apply_reranker_conf,
+    gremlin_generate_selective_func,
 ):
     @router.post("/rag", status_code=status.HTTP_200_OK)
     def rag_answer_api(req: RAGRequest):
@@ -79,10 +81,11 @@ def rag_answer_api(req: RAGRequest):
     def set_graph_config(req):
         if req.client_config:
             huge_settings.graph_url = req.client_config.url
-            huge_settings.graph_name = req.client_config.name
+            huge_settings.graph_name = req.client_config.graph
             huge_settings.graph_user = req.client_config.user
             huge_settings.graph_pwd = req.client_config.pwd
             huge_settings.graph_space = req.client_config.gs
+            huge_settings.graph_token = req.client_config.token
 
     @router.post("/rag/graph", status_code=status.HTTP_200_OK)
     def graph_rag_recall_api(req: GraphRAGRequest):
@@ -139,7 +142,7 @@ def graph_rag_recall_api(req: GraphRAGRequest):
     @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
     def graph_config_api(req: GraphConfigRequest):
         # Accept status code
-        res = apply_graph_conf(req.url, req.name, req.user, req.pwd, req.gs, origin_call="http")
+        res = apply_graph_conf(req.url, req.graph, req.user, req.pwd, req.gs, origin_call="http")
         return generate_response(RAGResponse(status_code=res, message="Missing Value"))
 
     # TODO: restructure the implement of llm to three types, like "/config/chat_llm"
@@ -178,3 +181,29 @@ def rerank_config_api(req: RerankerConfigRequest):
         else:
             res = status.HTTP_501_NOT_IMPLEMENTED
         return generate_response(RAGResponse(status_code=res, message="Missing Value"))
+
+    @router.post("/text2gremlin", status_code=status.HTTP_200_OK)
+    def text2gremlin_api(req: GremlinGenerateRequest):
+        try:
+            set_graph_config(req)
+
+            output_types_str_list = None
+            if req.output_types:
+                output_types_str_list = [ot.value for ot in req.output_types]
+
+            response_dict = gremlin_generate_selective_func(
+                inp=req.query,
+                example_num=req.example_num,
+                schema_input=huge_settings.graph_name,
+                gremlin_prompt_input=req.gremlin_prompt,
+                requested_outputs=output_types_str_list,
+            )
+            return response_dict
+        except HTTPException as e:
+            raise e
+        except Exception as e:
+            log.error(f"Error in text2gremlin_api: {e}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="An unexpected error occurred during Gremlin generation.",
+            ) from e
diff --git a/hugegraph-llm/src/hugegraph_llm/api/vector_api.py b/hugegraph-llm/src/hugegraph_llm/api/vector_api.py
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from datetime import date
+from typing import Optional
+
+from fastapi import status, APIRouter, HTTPException, Body
+
+from hugegraph_llm.utils.log import log
+from hugegraph_llm.api.models.rag_requests import GraphConfigRequest
+from hugegraph_llm.config import huge_settings
+
+API_CALL_TRACKER = {}
+
+
+# pylint: disable=too-many-statements
+def vector_http_api(router: APIRouter, update_embedding_func):
+    @router.post("/vector/embedding", status_code=status.HTTP_200_OK)
+    def update_embedding_api(
+            daily_limit: int = 50,
+            graph_config: Optional[GraphConfigRequest] = Body(None)
+        ):
+        """
+        Updates the vector embedding.
+        This endpoint is rate-limited. By default, it allows 2 calls per day. (Note: Not Thread-Safe!)
+        The rate limit is tracked per day and resets at midnight.
+        """
+        today = date.today()
+        for call_date in list(API_CALL_TRACKER.keys()):
+            if call_date != today:
+                del API_CALL_TRACKER[call_date]
+        call_count = API_CALL_TRACKER.get(today, 0)
+        if call_count >= daily_limit:
+            log.error("Rate limit exceeded for update_vid_embedding. Maximum %d calls per day.", daily_limit)
+            raise HTTPException(
+                status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                detail=f"API call limit of {daily_limit} per day exceeded. Please try again tomorrow."
+            )
+        API_CALL_TRACKER[today] = call_count + 1
+        if graph_config:
+            huge_settings.graph_url = graph_config.url
+            huge_settings.graph_name = graph_config.graph
+            huge_settings.graph_user = graph_config.user
+            huge_settings.graph_pwd = graph_config.pwd
+            huge_settings.graph_space = graph_config.gs
+            huge_settings.graph_token = graph_config.token
+
+        result = update_embedding_func()
+        result = {"detail": result}
+        return result
diff --git a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
@@ -27,6 +27,7 @@ class HugeGraphConfig(BaseConfig):
     graph_user: Optional[str] = "admin"
     graph_pwd: Optional[str] = "xxx"
     graph_space: Optional[str] = None
+    graph_token: Optional[str] = None
 
     # graph query config
     limit_property: Optional[str] = "False"

diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py
@@ -22,9 +22,9 @@
 from hugegraph_llm.utils.log import log
 
 dir_name = os.path.dirname
+package_path = dir_name(dir_name(dir_name(dir_name(dir_name(os.path.abspath(__file__))))))
 F_NAME = "config_prompt.yaml"
-yaml_file_path = os.path.join(os.getcwd(), "src/hugegraph_llm/resources/demo", F_NAME)
-
+yaml_file_path = os.path.join(package_path, f"src/hugegraph_llm/resources/demo/{F_NAME}")
 
 class BasePromptConfig:
     graph_schema: str = ''

diff --git a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
@@ -218,6 +218,8 @@ class PromptConfig(BasePromptConfig):
 - You may use the vertex ID directly if it’s provided in the context.
 - If the provided question contains entity names that are very similar to the Vertices IDs, then in the generated Gremlin statement, replace the approximate entities from the original question.
 For example, if the question includes the name ABC, and the provided VerticesIDs do not contain ABC but only abC, then use abC instead of ABC from the original question when generating the gremlin.
+- Similarly, if the user's query refers to specific property names or their values, and these are present or align with the 'Referenced Extracted Properties', actively utilize these properties in your Gremlin query.
+For instance, you can use them for filtering vertices or edges (e.g., using `has('propertyName', 'propertyValue')`), or for projecting specific values.
 
 The output format must be as follows:
 ```gremlin
@@ -231,6 +233,9 @@ class PromptConfig(BasePromptConfig):
 Referenced Extracted Vertex IDs Related to the Query:
 {vertices}
 
+Referenced Extracted Properties Related to the Query (Format: [('property_name', 'property_value'), ...]):
+{properties}
+
 Generate Gremlin from the Following User Query:
 {query}
 

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -23,6 +23,7 @@
 
 from hugegraph_llm.api.admin_api import admin_http_api
 from hugegraph_llm.api.rag_api import rag_http_api
+from hugegraph_llm.api.vector_api import vector_http_api
 from hugegraph_llm.config import admin_settings, huge_settings, prompt
 from hugegraph_llm.demo.rag_demo.admin_block import create_admin_block, log_stream
 from hugegraph_llm.demo.rag_demo.configs_block import (
@@ -32,10 +33,15 @@
     apply_reranker_config,
     apply_graph_config,
 )
+from hugegraph_llm.utils.graph_index_utils import update_vid_embedding
 from hugegraph_llm.demo.rag_demo.other_block import create_other_block
 from hugegraph_llm.demo.rag_demo.other_block import lifespan
 from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
-from hugegraph_llm.demo.rag_demo.text2gremlin_block import create_text2gremlin_block, graph_rag_recall
+from hugegraph_llm.demo.rag_demo.text2gremlin_block import (
+    create_text2gremlin_block,
+    graph_rag_recall,
+    gremlin_generate_selective,
+)
 from hugegraph_llm.demo.rag_demo.vector_graph_block import create_vector_graph_block
 from hugegraph_llm.resources.demo.css import CSS
 from hugegraph_llm.utils.log import log
@@ -171,9 +177,10 @@ def create_app():
         apply_llm_config,
         apply_embedding_config,
         apply_reranker_config,
+        gremlin_generate_selective,
     )
     admin_http_api(api_auth, log_stream)
-
+    vector_http_api(api_auth, update_vid_embedding)
     app.include_router(api_auth)
     # Mount Gradio inside FastAPI
     # TODO: support multi-user login when need

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
@@ -219,7 +219,7 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non
         data = {
             "model": arg3,
             "temperature": 0.01,
-            "messages": [{"role": "user", "content": "test"}],
+            "messages": [{"role": "user", "content": "hello"}],
         }
         headers = {"Authorization": f"Bearer {arg1}"}
         status_code = test_api_connection(test_url, method="POST", headers=headers, body=data, origin_call=origin_call)