@@ -479,58 +479,47 @@ def child_index(self):
479
479
480
480
def parent_index (self ):
481
481
"""
482
- Create a :class:`ParentIndex` for finding parent edges of nodes. This
483
- operation requires sorting the edges by child ID and left coordinate,
484
- and therefore requires O(E log E) time complexity.
482
+ Create a :class:`ParentIndex` for finding parent edges of nodes.
483
+
484
+ Edges within each child's group are not guaranteed to be in any
485
+ specific order. This operation uses a two-pass algorithm with
486
+ O(N + E) time complexity and O(N) auxiliary space.
485
487
486
488
:return: A new parent index container that can be used to
487
489
efficiently find all edges where a given node is the child.
488
490
:rtype: ParentIndex
489
491
"""
490
- index_range = np .full ((self .num_nodes , 2 ), - 1 , dtype = np .int32 )
491
- edge_index = np .zeros (self .num_edges , dtype = np .int32 )
492
- if self .num_edges == 0 :
493
- return ParentIndex (edge_index , index_range )
494
-
495
- # Create array of edge IDs
496
- edge_index [:] = np .arange (self .num_edges , dtype = np .int32 )
497
-
498
- # Sort edge IDs by child node (and by left coordinate as secondary sort)
499
- # We need to implement our own sorting since numba doesn't support lexsort
500
- # Use a stable sort to maintain order for secondary key
501
- # First sort by left coordinate (secondary key) using a stable sort
502
- edges_left = self .edges_left
492
+ num_nodes = self .num_nodes
493
+ num_edges = self .num_edges
503
494
edges_child = self .edges_child
504
495
505
- left_coords = np .zeros (self .num_edges , dtype = np .float64 )
506
- for i in range (self .num_edges ):
507
- left_coords [i ] = edges_left [edge_index [i ]]
508
-
509
- # Stable sort by left coordinate
510
- sort_indices = np .argsort (left_coords , kind = "mergesort" )
511
- edge_index [:] = edge_index [sort_indices ]
512
-
513
- # Stable sort by child node
514
- child_nodes = np .zeros (self .num_edges , dtype = np .int32 )
515
- for i in range (self .num_edges ):
516
- child_nodes [i ] = edges_child [edge_index [i ]]
517
- sort_indices = np .argsort (child_nodes , kind = "mergesort" )
518
- edge_index [:] = edge_index [sort_indices ]
519
-
520
- # Find ranges
521
- last_child = - 1
522
- for j in range (self .num_edges ):
523
- edge_id = edge_index [j ]
524
- child = edges_child [edge_id ]
496
+ child_counts = np .zeros (num_nodes , dtype = np .int32 )
497
+ edge_index = np .zeros (num_edges , dtype = np .int32 )
498
+ index_range = np .zeros ((num_nodes , 2 ), dtype = np .int32 )
525
499
526
- if child != last_child :
527
- index_range [child , 0 ] = j
528
- if last_child != - 1 :
529
- index_range [last_child , 1 ] = j
530
- last_child = child
500
+ if num_edges == 0 :
501
+ return ParentIndex (edge_index , index_range )
531
502
532
- if last_child != - 1 :
533
- index_range [last_child , 1 ] = self .num_edges
503
+ # Count how many children each node has
504
+ for child_node in edges_child :
505
+ child_counts [child_node ] += 1
506
+
507
+ # From the counts build the index ranges, we set both the start and the
508
+ # end index to the start - this lets us use the end index as a tracker
509
+ # for where we should insert the next edge for that node - when all
510
+ # edges are done these values will be the correct end values!
511
+ current_start = 0
512
+ for i in range (num_nodes ):
513
+ index_range [i , :] = current_start
514
+ current_start += child_counts [i ]
515
+
516
+ # Now go over the edges, inserting them at the index pointed to
517
+ # by the node's current end value, then increment.
518
+ for edge_id in range (num_edges ):
519
+ child = edges_child [edge_id ]
520
+ pos = index_range [child , 1 ]
521
+ edge_index [pos ] = edge_id
522
+ index_range [child , 1 ] += 1
534
523
535
524
return ParentIndex (edge_index , index_range )
536
525
0 commit comments