Skip to content

Commit f5ea3d5

Browse files
authored
When saving kg only save the node id in the relationships (#1926)
When saving the knowledge graph, all the fields of each node is no longer stored in the relationships. This can save a very large amount of hard disk space.
1 parent 88e5fd3 commit f5ea3d5

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

src/ragas/testset/graph.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from enum import Enum
77
from pathlib import Path
88

9-
from pydantic import BaseModel, Field
9+
from pydantic import BaseModel, Field, field_serializer
1010

1111

1212
class UUIDEncoder(json.JSONEncoder):
@@ -132,6 +132,10 @@ def __eq__(self, other: object) -> bool:
132132
if isinstance(other, Relationship):
133133
return self.id == other.id
134134
return False
135+
136+
@field_serializer("source", "target")
137+
def serialize_node(self, node: Node):
138+
return node.id
135139

136140

137141
@dataclass
@@ -221,7 +225,19 @@ def load(cls, path: t.Union[str, Path]) -> "KnowledgeGraph":
221225
data = json.load(f)
222226

223227
nodes = [Node(**node_data) for node_data in data["nodes"]]
224-
relationships = [Relationship(**rel_data) for rel_data in data["relationships"]]
228+
229+
nodes_map = {str(node.id): node for node in nodes}
230+
relationships = [
231+
Relationship(
232+
id=rel_data["id"],
233+
type=rel_data["type"],
234+
source=nodes_map[rel_data["source"]],
235+
target=nodes_map[rel_data["target"]],
236+
bidirectional=rel_data["bidirectional"],
237+
properties=rel_data["properties"],
238+
)
239+
for rel_data in data["relationships"]
240+
]
225241

226242
kg = cls()
227243
kg.nodes.extend(nodes)

0 commit comments

Comments
 (0)