Skip to content

Commit bfded78

Browse files
committed
Add sort_nodes_for_upload() method to Collection class
1 parent 776dec3 commit bfded78

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

pipeline/src/collection.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,30 @@ def validate(self, ignore=None):
220220
def is_valid(self):
221221
failures = self.validate()
222222
return len(failures) == 0
223+
224+
def sort_nodes_for_upload(self):
225+
"""
226+
Return a list of nodes, sorted so that they can be uploaded to a graph database safely,
227+
i.e., child nodes will be saved before their parents.
228+
229+
The upload code is assumed to generate @ids and update the Python instances accordingly.
230+
"""
231+
unsorted = set(self.nodes.keys())
232+
sorted = []
233+
# initial step: move nodes with no children (downstream links) directly to `sorted`
234+
for node_id in unsorted:
235+
if len(self.nodes[node_id].links) == 0:
236+
sorted.append(node_id)
237+
for node_id in sorted:
238+
unsorted.remove(node_id)
239+
# now iteratively add nodes to `sorted` if all their children are already in `sorted`
240+
while len(unsorted) > 0:
241+
newly_sorted = []
242+
for node_id in unsorted:
243+
child_ids = set(child.id for child in self.nodes[node_id].links)
244+
if not child_ids.difference(sorted):
245+
sorted.append(node_id)
246+
newly_sorted.append(node_id)
247+
for node_id in newly_sorted:
248+
unsorted.remove(node_id)
249+
return [self.nodes[node_id] for node_id in sorted]

0 commit comments

Comments
 (0)