Skip to content

Commit d5e2232

Browse files
committed
[bugfix][update_graph: data decode error][vector_search: index_name is empty]
1 parent b0845f7 commit d5e2232

File tree

1 file changed

+36
-35
lines changed

1 file changed

+36
-35
lines changed

muagent/service/ekg_construct/ekg_construct_base.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -181,28 +181,29 @@ def update_graph(
181181
origin_nodes: List[GNode], origin_edges: List[GEdge],
182182
new_nodes: List[GNode], new_edges: List[GEdge], teamid: str
183183
):
184-
#
185-
origin_nodeids = set([node["id"] for node in origin_nodes])
186-
origin_edgeids = set([f"{edge['start_id']}__{edge['end_id']}" for edge in origin_edges])
187-
nodeids = set([node["id"] for node in new_nodes])
188-
edgeids = set([f"{edge['start_id']}__{edge['end_id']}" for edge in new_edges])
184+
185+
origin_nodeids = set([node.id for node in origin_nodes])
186+
origin_edgeids = set([f"{edge.start_id}__{edge.end_id}" for edge in origin_edges])
187+
nodeids = set([node.id for node in new_nodes])
188+
edgeids = set([f"{edge.start_id}__{edge.end_id}" for edge in new_edges])
189+
189190
unique_nodeids = origin_nodeids&nodeids
190191
unique_edgeids = origin_edgeids&edgeids
191192
nodeid2nodes_dict = {}
192193
for node in origin_nodes + new_nodes:
193-
nodeid2nodes_dict.setdefault(node["id"], []).append(node)
194+
nodeid2nodes_dict.setdefault(node.id, []).append(node)
194195

195196
edgeid2edges_dict = {}
196197
for edge in origin_edges + new_edges:
197-
edgeid2edges_dict.setdefault(f"{edge['start_id']}__{edge['end_id']}", []).append(edge)
198+
edgeid2edges_dict.setdefault(f"{edge.start_id}__{edge.end_id}", []).append(edge)
198199

199200
# get add nodes & edges
200-
add_nodes = [node for node in new_nodes if node["id"] not in origin_nodeids]
201-
add_edges = [edge for edge in new_edges if f"{edge['start_id']}__{edge['end_id']}" not in origin_edgeids]
201+
add_nodes = [node for node in new_nodes if node.id not in origin_nodeids]
202+
add_edges = [edge for edge in new_edges if f"{edge.start_id}__{edge.end_id}" not in origin_edgeids]
202203

203204
# get delete nodes & edges
204-
delete_nodes = [node for node in origin_nodes if node["id"] not in nodeids]
205-
delete_edges = [edge for edge in origin_edges if f"{edge['start_id']}__{edge['end_id']}" not in edgeids]
205+
delete_nodes = [node for node in origin_nodes if node.id not in nodeids]
206+
delete_edges = [edge for edge in origin_edges if f"{edge.start_id}__{edge.end_id}" not in edgeids]
206207

207208
# get update nodes & edges
208209
update_nodes = [
@@ -215,16 +216,16 @@ def update_graph(
215216
for edgeid in unique_edgeids
216217
if edgeid2edges_dict[edgeid][0]!=edgeid2edges_dict[edgeid][1]
217218
]
218-
219+
219220
#
220-
add_node_result = self.add_nodes([GNode(**n) for n in add_nodes], teamid)
221-
add_edge_result = self.add_edges([GEdge(**e) for e in add_edges], teamid)
221+
add_node_result = self.add_nodes(add_nodes, teamid)
222+
add_edge_result = self.add_edges(add_edges, teamid)
222223

223-
delete_edge_result = self.delete_nodes([GNode(**n) for n in delete_nodes], teamid)
224-
delete_node_result = self.delete_edges([GEdge(**e) for e in delete_edges], teamid)
224+
delete_edge_result = self.delete_nodes(delete_nodes, teamid)
225+
delete_node_result = self.delete_edges(delete_edges, teamid)
225226

226-
update_node_result = self.update_nodes([GNode(**n) for n in update_nodes], teamid)
227-
update_edge_result = self.update_edges([GEdge(**e) for e in update_edges], teamid)
227+
update_node_result = self.update_nodes(update_nodes, teamid)
228+
update_edge_result = self.update_edges(update_edges, teamid)
228229

229230
return [add_node_result, add_edge_result, delete_edge_result, delete_node_result, update_node_result, update_edge_result]
230231

@@ -275,15 +276,11 @@ def add_edges(self, edges: List[GEdge], teamid: str):
275276

276277
def delete_nodes(self, nodes: List[GNode], teamid: str=''):
277278
# delete tbase nodes
278-
# r = self.tb.search(f"@node_str: 'graph_id={teamid}'", index_name=self.node_indexname)
279279
r = self.tb.search(f"@node_str: *{teamid}*", index_name=self.node_indexname, limit=len(nodes))
280280

281281
tbase_nodeids = [data['node_id'] for data in r.docs] # 附带了definition信息
282-
# tbase_nodeids_dict = {data["node_id"]:data['id'] for data in r.docs} # 附带了definition信息
283282
delete_nodeids = [node.id for node in nodes]
284283
tbase_missing_nodeids = [nodeid for nodeid in delete_nodeids if nodeid not in tbase_nodeids]
285-
# delete_tbase_nodeids = [nodeid for nodeid in delete_nodeids if nodeid in tbase_nodeids]
286-
# delete_tbase_nodeids = [tbase_nodeids_dict[nodeid] for nodeid in delete_nodeids if nodeid in tbase_nodeids]
287284

288285
if len(tbase_missing_nodeids) > 0:
289286
logger.error(f"there must something wrong! ID not match, such as {tbase_missing_nodeids}")
@@ -313,15 +310,11 @@ def delete_nodes(self, nodes: List[GNode], teamid: str=''):
313310

314311
def delete_edges(self, edges: List[GEdge], teamid: str):
315312
# delete tbase nodes
316-
# r = self.tb.search(f"@edge_str: 'graph_id={teamid}'", index_name=self.edge_indexname)
317313
r = self.tb.search(f"@edge_str: *{teamid}*", index_name=self.edge_indexname, limit=len(edges))
318314

319315
tbase_edgeids = [data['edge_id'] for data in r.docs]
320-
# tbase_edgeids_dict = {data["edge_id"]:data['id'] for data in r.docs} # id附带了definition信息
321316
delete_edgeids = [f"{edge.start_id}__{edge.end_id}" for edge in edges]
322317
tbase_missing_edgeids = [edgeid for edgeid in delete_edgeids if edgeid not in tbase_edgeids]
323-
# delete_tbase_edgeids = [edgeid for edgeid in delete_edgeids if edgeid in tbase_edgeids]
324-
# delete_tbase_edgeids = [tbase_edgeids_dict[edgeid] for edgeid in delete_edgeids if edgeid in tbase_edgeids]
325318

326319
if len(tbase_missing_edgeids) > 0:
327320
logger.error(f"there must something wrong! ID not match, such as {tbase_missing_edgeids}")
@@ -343,14 +336,12 @@ def delete_edges(self, edges: List[GEdge], teamid: str):
343336

344337
def update_nodes(self, nodes: List[GNode], teamid: str):
345338
# delete tbase nodes
346-
# r = self.tb.search(f"@node_str: 'graph_id={teamid}'", index_name=self.node_indexname)
347339
r = self.tb.search(f"@node_str: *{teamid}*", index_name=self.node_indexname, limit=len(nodes))
348340
teamids_by_nodeid = {data['node_id']: data["node_str"] for data in r.docs}
349341

350342
tbase_nodeids = [data['node_id'] for data in r.docs] # 附带了definition信息
351343
update_nodeids = [node.id for node in nodes]
352344
tbase_missing_nodeids = [nodeid for nodeid in update_nodeids if nodeid not in tbase_nodeids]
353-
# update_tbase_nodeids = [nodeid for nodeid in update_nodeids if nodeid in tbase_nodeids]
354345

355346
if len(tbase_missing_nodeids) > 0:
356347
logger.error(f"there must something wrong! ID not match, such as {tbase_missing_nodeids}")
@@ -464,10 +455,12 @@ def search_nodes_by_text(self, text: str, node_type: str = None, teamid: str = N
464455
# base_query = f'(@node_str: graph_id={teamid})=>[KNN {top_k} @{key} $vector AS distance]'
465456
base_query = f'(*)=>[KNN {top_k} @{key} $vector AS distance]'
466457
query_params = {"vector": query_embedding}
467-
r = self.tb.vector_search(base_query, query_params=query_params)
458+
r = self.tb.vector_search(base_query, index_name=self.node_indexname, query_params=query_params)
468459

469460
for i in r.docs:
470-
nodeid_with_dist.append((i["ID"], float(i["distance"])))
461+
data_dict = i.__dict__
462+
if "ID" not in data_dict: continue # filter data
463+
nodeid_with_dist.append((data_dict["ID"], float(data_dict["distance"])))
471464

472465
nodeid_with_dist = sorted(nodeid_with_dist, key=lambda x:x[1], reverse=False)
473466
for nodeid, dis in nodeid_with_dist:
@@ -523,6 +516,7 @@ def create_ekg(
523516
teamid: str,
524517
service_name: str,
525518
rootid: str,
519+
graphid: str = "",
526520
intent_text: str = None,
527521
intent_nodes: List[str] = [],
528522
all_intent_list: List=[],
@@ -543,7 +537,7 @@ def create_ekg(
543537
result = self.text2graph(text, ancestor_list, all_intent_list, teamid)
544538

545539
# do write
546-
graph = self.write2kg(result["sls_graph"], result["tbase_graph"], teamid, do_save=do_save)
540+
graph = self.write2kg(result["sls_graph"], teamid, graphid, do_save=do_save)
547541
result["graph"] = graph
548542
return result
549543

@@ -559,13 +553,20 @@ def text2graph(self, text: str, intents: List[str], all_intent_list: List[str],
559553
dsl_graph = self.transform2dsl(sls_graph, intents, all_intent_list, teamid=teamid)
560554
return {"tbase_graph": tbase_graph, "sls_graph": sls_graph, "dsl_graph": dsl_graph}
561555

562-
def write2kg(self, ekg_sls_data: EKGSlsData, ekg_tbase_data: EKGTbaseData, teamid, do_save: bool=False) -> Graph:
556+
def write2kg(self, ekg_sls_data: EKGSlsData, teamid: str, graphid: str="", do_save: bool=False) -> Graph:
557+
'''
558+
:param graphid: str, use for record the new path
559+
'''
563560
# everytimes, it will add new nodes and edges
564561

565-
gbase_nodes = [TYPE2SCHEMA.get(node.type,)(**node.dict()) for node in ekg_sls_data.nodes]
566-
gbase_nodes = [GNode(id=node.id, type=node.type, attributes=node.attributes()) for node in gbase_nodes]
562+
gbase_nodes: List[EKGNodeSchema] = [TYPE2SCHEMA.get(node.type,)(**node.dict()) for node in ekg_sls_data.nodes]
563+
gbase_nodes: List[GNode] = [
564+
GNode(
565+
id=node.id, type=node.type,
566+
attributes=node.attributes() if graphid else {**node.attributes(), **{"graphid": f"{graphid}"}}
567+
) for node in gbase_nodes]
567568

568-
gbase_edges = [TYPE2SCHEMA.get("edge",)(**edge.dict()) for edge in ekg_sls_data.edges]
569+
gbase_edges: List[EKGEdgeSchema] = [TYPE2SCHEMA.get("edge",)(**edge.dict()) for edge in ekg_sls_data.edges]
569570
gbase_edges = [
570571
GEdge(start_id=edge.original_src_id1__, end_id=edge.original_dst_id2__,
571572
type="opsgptkg_"+edge.type.split("_")[2] + "_route_" + "opsgptkg_"+edge.type.split("_")[3],

0 commit comments

Comments
 (0)