Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
74cf3c5
doc: update for inner repo (GraphPlatform-4190)
Dec 9, 2024
e9941ad
Merge branch 'master' of ssh://icode.baidu.com:8235/baidu/starhugegra…
imbajin Jan 3, 2025
d2e4e2e
Merge branch 'main' into master-icode
imbajin Jan 10, 2025
c5ec3ea
Merge branch 'main' into master-icode
HJ-Young Jan 16, 2025
12221c9
docs(client): update README.md
imbajin Jan 20, 2025
8343d3c
Merge remote-tracking branch 'github/main'
Feb 7, 2025
0036b2c
Merge branch 'main'
HJ-Young Feb 25, 2025
173b9b2
Merge branch 'main' into master-icode
imbajin Mar 3, 2025
c9ef9ed
fix(llm): enable concurrency config in rag answer
imbajin Mar 3, 2025
c933c58
Merge branch 'main' into master-icode
imbajin Mar 3, 2025
263758b
Merge branch 'main' into master-icode
imbajin Mar 6, 2025
8a0e6cd
Merge branch 'main' into master-icode
imbajin Mar 11, 2025
a60887d
GraphPlatform-4765 [Tech Task] vermeer-client框架开发
Mar 10, 2025
43cdcae
update llm settings
Apr 27, 2025
b4f1a51
set num_gremlin_generate_example 0
Apr 28, 2025
142a44e
disable text2gql by default
Apr 28, 2025
9f245a0
remove log info
Apr 28, 2025
87cb1cf
temperature 0.01
Apr 29, 2025
961f582
merge community
Apr 29, 2025
4cbb47a
modify prompt to only output gql
Apr 29, 2025
36116e6
test_api_connection
May 7, 2025
5db4e0c
merge main to master
May 12, 2025
bf19a84
empty chunk
May 12, 2025
026c65c
build, get and remove property embeddings
May 19, 2025
f2ee374
limit the number of props to be updated
May 19, 2025
0e98879
disable pylint and modify limit logic
May 20, 2025
a3b864f
save after removing props
May 20, 2025
ac7e137
change key:value to value
May 20, 2025
8ace5a8
pv-embedding + set<(pk, pv)>
May 22, 2025
a9f92c4
vector/embedding api
May 22, 2025
18744d2
Merge branch 'main' into property_embedding
imbajin May 22, 2025
89b2d47
match keywords and props
May 22, 2025
f14087a
fix ollama batch embeddings
May 22, 2025
ef37855
fix ollama single embedding
May 23, 2025
c24d210
pylint
May 23, 2025
f1fdbdb
Merge branch 'main' into property_embedding
MrJs133 May 23, 2025
2953dbc
fix ollama
May 23, 2025
cf279f5
split run()
May 23, 2025
182ecba
using get_texts_embeddings instead of get_text_embeddings
May 23, 2025
b7e7425
match properties and change the structure of fuzzy_matched_props
May 23, 2025
52c40cb
property subgraph_query
May 23, 2025
10e76cd
pylint
May 26, 2025
923502a
pylint
May 26, 2025
739479a
limit 2 times one day
May 26, 2025
9acaa96
pylint
May 26, 2025
86e6098
inner
May 26, 2025
b27d925
Merge branch 'main' into master-icode
imbajin May 26, 2025
765e93f
Merge branch 'master-icode' into property_embedding
imbajin May 26, 2025
b5f31ff
format
May 26, 2025
50ec338
fix lint & comment
imbajin May 26, 2025
7d9d67c
text2gremlin api
May 26, 2025
c918e73
change params
May 26, 2025
7b7260a
change params
May 26, 2025
3754211
text to json
May 27, 2025
7a2cf2b
detail
May 27, 2025
7b3e5e2
add graph_space in text2gremlin api
May 27, 2025
cb31b35
add graph_space in text2gremlin api
May 27, 2025
9778c37
change default in text2gremlin api
May 27, 2025
cf6d2e4
split build_semantic_index.run()
May 28, 2025
eb5e9f1
Merge branch 'property_embedding' of https://icode.baidu.com/baidu/st…
May 28, 2025
e75f68f
conflict
May 28, 2025
4aa1a4d
change daily limit
May 29, 2025
7b7c6d2
create pyhugegraph client by token
May 29, 2025
31bf971
change param
May 29, 2025
a0e460c
name -> graph
May 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/api/vector_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


from datetime import date
from fastapi import status, APIRouter, HTTPException
from hugegraph_llm.utils.log import log

API_CALL_TRACKER = {}

# pylint: disable=too-many-statements
def vector_http_api(
router: APIRouter,
update_embedding_func,
):
@router.post("/vector/embedding", status_code=status.HTTP_200_OK)
def update_embedding_api(daily_limit: int = 2):
today = date.today()
for call_date in list(API_CALL_TRACKER.keys()):
if call_date != today:
del API_CALL_TRACKER[call_date]
call_count = API_CALL_TRACKER.get(today, 0)
if call_count >= daily_limit:
log.error("Rate limit exceeded for update_vid_embedding. Maximum %d calls per day.", daily_limit)
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail=f"API call limit of {daily_limit} per day exceeded. Please try again tomorrow."
)
API_CALL_TRACKER[today] = call_count + 1
result = update_embedding_func()
return result
5 changes: 5 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ class PromptConfig(BasePromptConfig):
- You may use the vertex ID directly if it’s provided in the context.
- If the provided question contains entity names that are very similar to the Vertices IDs, then in the generated Gremlin statement, replace the approximate entities from the original question.
For example, if the question includes the name ABC, and the provided VerticesIDs do not contain ABC but only abC, then use abC instead of ABC from the original question when generating the gremlin.
- Similarly, if the user's query refers to specific property names or their values, and these are present or align with the 'Referenced Extracted Properties', actively utilize these properties in your Gremlin query.
For instance, you can use them for filtering vertices or edges (e.g., using `has('propertyName', 'propertyValue')`), or for projecting specific values.

The output format must be as follows:
```gremlin
Expand All @@ -231,6 +233,9 @@ class PromptConfig(BasePromptConfig):
Referenced Extracted Vertex IDs Related to the Query:
{vertices}

Referenced Extracted Properties Related to the Query (Format: [('property_name', 'property_value'), ...]):
{properties}

Generate Gremlin from the Following User Query:
{query}

Expand Down
4 changes: 3 additions & 1 deletion hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from hugegraph_llm.api.admin_api import admin_http_api
from hugegraph_llm.api.rag_api import rag_http_api
from hugegraph_llm.api.vector_api import vector_http_api
from hugegraph_llm.config import admin_settings, huge_settings, prompt
from hugegraph_llm.demo.rag_demo.admin_block import create_admin_block, log_stream
from hugegraph_llm.demo.rag_demo.configs_block import (
Expand All @@ -32,6 +33,7 @@
apply_reranker_config,
apply_graph_config,
)
from hugegraph_llm.utils.graph_index_utils import update_vid_embedding
from hugegraph_llm.demo.rag_demo.other_block import create_other_block
from hugegraph_llm.demo.rag_demo.other_block import lifespan
from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
Expand Down Expand Up @@ -173,7 +175,7 @@ def create_app():
apply_reranker_config,
)
admin_http_api(api_auth, log_stream)

vector_http_api(api_auth, update_vid_embedding)
app.include_router(api_auth)
# Mount Gradio inside FastAPI
# TODO: support multi-user login when need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,13 @@ def example_index_query(self, num_examples):
return self

def gremlin_generate_synthesize(
self, schema, gremlin_prompt: Optional[str] = None, vertices: Optional[List[str]] = None
self,
schema,
gremlin_prompt: Optional[str] = None,
vertices: Optional[List[str]] = None,
properties: Optional[List[tuple]] = None
):
self.operators.append(GremlinGenerateSynthesize(self.llm, schema, vertices, gremlin_prompt))
self.operators.append(GremlinGenerateSynthesize(self.llm, schema, vertices, gremlin_prompt, properties))
return self

def print_result(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class FetchGraphData:

def __init__(self, graph: PyHugeClient):
self.graph = graph
self.schema = self.graph.schema()

def run(self, graph_summary: Optional[Dict[str, Any]]) -> Dict[str, Any]:
if graph_summary is None:
Expand All @@ -49,4 +50,19 @@ def res = [:];

if isinstance(result, list) and len(result) > 0:
graph_summary.update({key: result[i].get(key) for i, key in enumerate(keys)})

index_labels = self.schema.getIndexLabels()
if index_labels:
graph_summary["index_labels"] = [
{
"id": label.id,
"base_type": label.baseType,
"base_value": label.baseValue,
"name": label.name,
"fields": label.fields,
"index_type": label.indexType
} for label in index_labels
]
else:
graph_summary["index_labels"] = []
return graph_summary
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"""

PROPERTY_QUERY_NEIGHBOR_TPL = """\
g.V().has('{prop}', within({keywords}))
g.V().has('{current_prop_name}', '{current_prop_value}')
.repeat(
bothE({edge_labels}).limit({edge_limit}).otherV().dedup()
).times({max_deep}).emit()
Expand All @@ -65,8 +65,8 @@
)
.by(project('label', 'inV', 'outV', 'props')
.by(label())
.by(inV().values('{prop}'))
.by(outV().values('{prop}'))
.by(inV().values('{current_prop_name}'))
.by(outV().values('{current_prop_name}'))
.by(valueMap().by(unfold()))
)
.limit({max_items})
Expand Down Expand Up @@ -129,12 +129,13 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
def _gremlin_generate_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
query = context["query"]
vertices = context.get("match_vids")
properties = context.get("match_props")
query_embedding = context.get("query_embedding")

self._gremlin_generator.clear()
self._gremlin_generator.example_index_query(num_examples=self._num_gremlin_generate_example)
gremlin_response = self._gremlin_generator.gremlin_generate_synthesize(
context["simple_schema"], vertices=vertices, gremlin_prompt=self._gremlin_prompt
context["simple_schema"], vertices=vertices, gremlin_prompt=self._gremlin_prompt, properties=properties
).run(query=query, query_embedding=query_embedding)
if self._num_gremlin_generate_example > 0:
gremlin = gremlin_response["result"]
Expand All @@ -160,12 +161,14 @@ def _gremlin_generate_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
# 1. Extract params from context
matched_vids = context.get("match_vids")
matched_props = context.get("match_props")
if isinstance(context.get("max_deep"), int):
self._max_deep = context["max_deep"]
if isinstance(context.get("max_items"), int):
self._max_items = context["max_items"]
if isinstance(context.get("prop_to_match"), str):
self._prop_to_match = context["prop_to_match"]
if isinstance(context.get("match_props"), list):
self._prop_to_match = matched_props[0][0] if matched_props else None
log.debug("Prop to match: %s", self._prop_to_match)

# 2. Extract edge_labels from graph schema
_, edge_labels = self._extract_labels_from_schema()
Expand Down Expand Up @@ -207,31 +210,34 @@ def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
vertex_degree_list[0].update(vertex_knowledge)
else:
vertex_degree_list.append(vertex_knowledge)
else:
elif matched_props:
# WARN: When will the query enter here?
keywords = context.get("keywords")
assert keywords, "No related property(keywords) for graph query."
keywords_str = ",".join("'" + kw + "'" for kw in keywords)
gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
prop=self._prop_to_match,
keywords=keywords_str,
edge_labels=edge_labels_str,
edge_limit=edge_limit_amount,
max_deep=self._max_deep,
max_items=self._max_items,
)
log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")
graph_chain_knowledge = set()
for prop_name, prop_value in matched_props:
self._prop_to_match = prop_name
gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
current_prop_name=prop_name,
current_prop_value=prop_value,
edge_labels=edge_labels_str,
edge_limit=edge_limit_amount,
max_deep=self._max_deep,
max_items=self._max_items
)
log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")
log.debug("property gremlin: %s", gremlin_query)

paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
query_paths=paths
)
paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
log.debug("paths: %s", paths)
temp_graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
query_paths=paths
)
graph_chain_knowledge.update(temp_graph_chain_knowledge)

context["graph_result"] = list(graph_chain_knowledge)
if context["graph_result"]:
context["graph_result_flag"] = 0
context["vertex_degree_list"] = [list(vertex_degree) for vertex_degree in vertex_degree_list]
context["knowledge_with_degree"] = knowledge_with_degree
context["knowledge_with_degree"] = knowledge_with_degree # pylint: disable=possibly-used-before-assignment
context["graph_context_head"] = (
f"The following are graph knowledge in {self._max_deep} depth, e.g:\n"
"`vertexA--[links]-->vertexB<--[links]--vertexC ...`"
Expand Down Expand Up @@ -340,7 +346,7 @@ def _process_vertex(
node_str = matched_str
else:
v_cache.add(matched_str)
node_str = f"{item['id']}{{{props_str}}}"
node_str = f"{item['id']}{{{props_str}}}" if use_id_to_match else f"{item['props']}{{{props_str}}}"

flat_rel += node_str
nodes_with_degree.append(node_str)
Expand Down
Loading
Loading