Skip to content

Commit a0cc3f9

Browse files
MrJs133imbajin
andauthored
refactor(llm): return schema.groovy first when backup graph data (#161)
Note: for non-groovy mode, return JSON format --------- Co-authored-by: imbajin <[email protected]>
1 parent 1c9bb5e commit a0cc3f9

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,14 @@ def backup_data():
111111
"vertices.json": f"g.V().limit({MAX_VERTICES})"
112112
f".aggregate('vertices').count().as('count').select('count','vertices')",
113113
"edges.json": f"g.E().limit({MAX_EDGES}).aggregate('edges').count().as('count').select('count','edges')",
114-
"schema.json": client.schema().getSchema()
114+
"schema.json": client.schema().getSchema(_format="groovy")
115115
}
116116

117+
vertexlabels = client.schema().getSchema()["vertexlabels"]
118+
all_pk_flag = all(data.get('id_strategy') == 'PRIMARY_KEY' for data in vertexlabels)
119+
117120
for filename, query in files.items():
118-
with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as f:
119-
data = client.gremlin().exec(query)["data"] if "schema" not in filename else query
120-
json.dump(data, f, ensure_ascii=False)
121+
write_backup_file(client, backup_subdir, filename, query, all_pk_flag)
121122

122123
log.info("Backup successfully in %s.", backup_subdir)
123124
relative_backup_subdir = os.path.relpath(backup_subdir, start=resource_path)
@@ -128,6 +129,27 @@ def backup_data():
128129
raise Exception("Failed to execute backup") from e
129130

130131

132+
def write_backup_file(client, backup_subdir, filename, query, all_pk_flag):
133+
with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as f:
134+
if filename == "edges.json":
135+
data = client.gremlin().exec(query)["data"][0]["edges"]
136+
json.dump(data, f, ensure_ascii=False)
137+
elif filename == "vertices.json":
138+
data_full = client.gremlin().exec(query)["data"][0]["vertices"]
139+
data = [{key: value for key, value in vertex.items() if key != "id"}
140+
for vertex in data_full] if all_pk_flag else data_full
141+
json.dump(data, f, ensure_ascii=False)
142+
elif filename == "schema.json":
143+
data_full = query
144+
if isinstance(data_full, dict) and "schema" in data_full:
145+
groovy_filename = filename.replace(".json", ".groovy")
146+
with open(os.path.join(backup_subdir, groovy_filename), "w", encoding="utf-8") as groovy_file:
147+
groovy_file.write(str(data_full["schema"]))
148+
else:
149+
data = data_full
150+
json.dump(data, f, ensure_ascii=False)
151+
152+
131153
def manage_backup_retention():
132154
try:
133155
backup_dirs = [

0 commit comments

Comments
 (0)