Skip to content

Commit 534b198

Browse files
committed
feat: fix neo4j id
1 parent c4b699c commit 534b198

File tree

4 files changed

+83
-42
lines changed

4 files changed

+83
-42
lines changed

aperag/graph/lightrag/kg/neo4j_sync_impl.py

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,8 @@ def _sync_get_knowledge_graph():
460460
result = KnowledgeGraph()
461461
seen_nodes = set()
462462
seen_edges = set()
463+
# Map Neo4j internal id -> entity_id for edges (unified semantics: source/target = entity_id)
464+
internal_id_to_entity_id: dict[int, str] = {}
463465

464466
with Neo4jSyncConnectionManager.get_session(database=self._DATABASE) as session:
465467
if node_label == "*":
@@ -476,18 +478,22 @@ def _sync_get_knowledge_graph():
476478

477479
for record in node_results:
478480
node = record["n"]
479-
node_id = node.id
480-
if node_id not in seen_nodes:
481+
internal_id = node.id
482+
if internal_id not in seen_nodes:
483+
entity_id = node.get("entity_id")
484+
node_id = str(entity_id) if entity_id is not None else f"{internal_id}"
485+
internal_id_to_entity_id[internal_id] = node_id
486+
entity_type = node.get("entity_type")
481487
result.nodes.append(
482488
KnowledgeGraphNode(
483-
id=f"{node_id}",
484-
labels=[node.get("entity_id")],
489+
id=node_id,
490+
labels=[entity_type] if entity_type else [node_id],
485491
properties=dict(node),
486492
)
487493
)
488-
seen_nodes.add(node_id)
494+
seen_nodes.add(internal_id)
489495

490-
# Get edges between these nodes
496+
# Get edges between these nodes; source/target must be entity_id
491497
edge_query = """
492498
MATCH (a)-[r]-(b)
493499
WHERE id(a) IN $node_ids AND id(b) IN $node_ids
@@ -499,15 +505,18 @@ def _sync_get_knowledge_graph():
499505
rel = record["r"]
500506
edge_id = rel.id
501507
if edge_id not in seen_edges:
502-
result.edges.append(
503-
KnowledgeGraphEdge(
504-
id=f"{edge_id}",
505-
type=rel.type,
506-
source=f"{record['a'].id}",
507-
target=f"{record['b'].id}",
508-
properties=dict(rel),
508+
src_entity = internal_id_to_entity_id.get(record["a"].id)
509+
tgt_entity = internal_id_to_entity_id.get(record["b"].id)
510+
if src_entity is not None and tgt_entity is not None:
511+
result.edges.append(
512+
KnowledgeGraphEdge(
513+
id=f"{edge_id}",
514+
type=rel.type,
515+
source=src_entity,
516+
target=tgt_entity,
517+
properties=dict(rel),
518+
)
509519
)
510-
)
511520
seen_edges.add(edge_id)
512521
else:
513522
# BFS from specific node
@@ -527,30 +536,37 @@ def _sync_get_knowledge_graph():
527536
for record in results:
528537
if record["nodes"]:
529538
for node in record["nodes"]:
530-
node_id = node.id
531-
if node_id not in seen_nodes:
539+
internal_id = node.id
540+
if internal_id not in seen_nodes:
541+
entity_id = node.get("entity_id")
542+
node_id = str(entity_id) if entity_id is not None else f"{internal_id}"
543+
internal_id_to_entity_id[internal_id] = node_id
544+
entity_type = node.get("entity_type")
532545
result.nodes.append(
533546
KnowledgeGraphNode(
534-
id=f"{node_id}",
535-
labels=[node.get("entity_id")],
547+
id=node_id,
548+
labels=[entity_type] if entity_type else [node_id],
536549
properties=dict(node),
537550
)
538551
)
539-
seen_nodes.add(node_id)
552+
seen_nodes.add(internal_id)
540553

541554
if record["rels"]:
542555
for rel in record["rels"]:
543556
edge_id = rel.id
544557
if edge_id not in seen_edges:
545-
result.edges.append(
546-
KnowledgeGraphEdge(
547-
id=f"{edge_id}",
548-
type=rel.type,
549-
source=f"{rel.start_node.id}",
550-
target=f"{rel.end_node.id}",
551-
properties=dict(rel),
558+
src_entity = internal_id_to_entity_id.get(rel.start_node.id)
559+
tgt_entity = internal_id_to_entity_id.get(rel.end_node.id)
560+
if src_entity is not None and tgt_entity is not None:
561+
result.edges.append(
562+
KnowledgeGraphEdge(
563+
id=f"{edge_id}",
564+
type=rel.type,
565+
source=src_entity,
566+
target=tgt_entity,
567+
properties=dict(rel),
568+
)
552569
)
553-
)
554570
seen_edges.add(edge_id)
555571

556572
logger.info(f"Retrieved subgraph with {len(result.nodes)} nodes and {len(result.edges)} edges")

aperag/graph/lightrag/kg/pg_ops_sync_graph_storage.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def _sync_get_knowledge_graph():
316316
nodes_data = db_ops.get_graph_nodes_batch(self.workspace, matching_labels)
317317

318318
for entity_id, node_data in nodes_data.items():
319-
# Assemble properties from individual fields
319+
# Unified semantics: id=entity_id, labels=[entity_type] or [entity_id], edges by entity_id
320320
properties = {
321321
"entity_id": node_data["entity_id"],
322322
"entity_type": node_data.get("entity_type"),
@@ -331,10 +331,11 @@ def _sync_get_knowledge_graph():
331331
# Remove None values for cleaner output
332332
properties = {k: v for k, v in properties.items() if v is not None}
333333

334+
entity_type = node_data.get("entity_type")
334335
result.nodes.append(
335336
KnowledgeGraphNode(
336337
id=entity_id,
337-
labels=[node_data.get("entity_type", entity_id)],
338+
labels=[entity_type] if entity_type else [entity_id],
338339
properties=properties,
339340
)
340341
)

aperag/graph/lightrag/types.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,28 @@ class GPTKeywordExtractionFormat(BaseModel):
4444

4545

4646
class KnowledgeGraphNode(BaseModel):
47+
"""
48+
Unified semantics for graph storage and API:
49+
- id: entity_id (business identifier). Used for display, node identity, and edge source/target.
50+
Must be stable and unique per entity; frontend uses it as node label and for merge/APIs.
51+
- labels: optional list of semantic labels, e.g. [entity_type] for categorization/filtering.
52+
- properties: entity_id, entity_type, description, source_id, file_path, entity_name, etc.
53+
"""
54+
4755
id: str
4856
labels: list[str]
4957
properties: dict[str, Any] # anything else goes here
5058

5159

5260
class KnowledgeGraphEdge(BaseModel):
61+
"""
62+
source/target must be the same as KnowledgeGraphNode.id (i.e. entity_id) for correct linking.
63+
"""
64+
5365
id: str
5466
type: Optional[str]
55-
source: str # id of source node
56-
target: str # id of target node
67+
source: str # entity_id of source node
68+
target: str # entity_id of target node
5769
properties: dict[str, Any] # anything else goes here
5870

5971

aperag/service/graph_service.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,24 +118,36 @@ async def get_knowledge_graph(
118118
await rag.finalize_storages()
119119

120120
def _convert_graph_to_dict(self, nodes, edges, is_truncated=False) -> Dict[str, Any]:
121-
"""Convert LightRAG graph objects to dictionary format"""
121+
"""
122+
Convert KnowledgeGraph to API dict. Semantics (see KnowledgeGraphNode):
123+
- id: node identity and display key (storage must use entity_id).
124+
- labels: pass-through from storage (e.g. [entity_type]); fallback to entity_id for display if empty.
125+
- properties: entity_id, entity_type, description, source_id, file_path, entity_name.
126+
"""
122127

123128
def extract_properties(obj, default_fields):
124129
if hasattr(obj, "properties") and obj.properties:
125130
return obj.properties
126131
return {field: getattr(obj, field, None) for field in default_fields if hasattr(obj, field)}
127132

133+
default_node_fields = ["entity_id", "entity_name", "entity_type", "description", "source_id", "file_path"]
134+
135+
def node_to_item(node):
136+
props = extract_properties(node, default_node_fields)
137+
# Use storage labels when present; else fallback so display is never numeric id
138+
if getattr(node, "labels", None) and node.labels:
139+
labels = node.labels
140+
else:
141+
display = props.get("entity_id") or props.get("entity_name")
142+
labels = [display] if display is not None else ([node.id] if hasattr(node, "id") else [])
143+
return {
144+
"id": node.id,
145+
"labels": labels,
146+
"properties": props,
147+
}
148+
128149
return {
129-
"nodes": [
130-
{
131-
"id": node.id,
132-
"labels": [node.id] if hasattr(node, "id") else [],
133-
"properties": extract_properties(
134-
node, ["entity_id", "entity_type", "description", "source_id", "file_path"]
135-
),
136-
}
137-
for node in nodes
138-
],
150+
"nodes": [node_to_item(node) for node in nodes],
139151
"edges": [
140152
{
141153
"id": edge.id,

0 commit comments

Comments
 (0)