Skip to content

Commit c3ae702

Browse files
Merge pull request #61 from openMetadataInitiative/sort-for-upload
Add `sort_nodes_for_upload()` method to Collection class
2 parents 776dec3 + 86914f8 commit c3ae702

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

pipeline/src/collection.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,29 @@ def validate(self, ignore=None):
220220
def is_valid(self):
221221
failures = self.validate()
222222
return len(failures) == 0
223+
224+
def sort_nodes_for_upload(self):
225+
"""
226+
Return a list of nodes, sorted so that they can be uploaded to a graph database safely,
227+
i.e., child nodes will be saved before their parents.
228+
229+
The upload code is assumed to generate @ids and update the Python instances accordingly.
230+
"""
231+
unsorted = set(self.nodes.keys())
232+
sorted = []
233+
# initial step: move nodes with no children (downstream links) directly to `sorted`
234+
for node_id in unsorted:
235+
if len(self.nodes[node_id].links) == 0:
236+
sorted.append(node_id)
237+
unsorted -= set(sorted)
238+
# now iteratively add nodes to `sorted` if all their children are already in `sorted`
239+
while len(unsorted) > 0:
240+
newly_sorted = []
241+
for node_id in unsorted:
242+
child_ids = set(child.id for child in self.nodes[node_id].links)
243+
if not child_ids.difference(sorted):
244+
sorted.append(node_id)
245+
newly_sorted.append(node_id)
246+
unsorted -= set(newly_sorted)
247+
return [self.nodes[node_id] for node_id in sorted]
248+

0 commit comments

Comments
 (0)