@@ -419,6 +419,8 @@ def nodes_df(self):
419419 "time" : ts .nodes_time ,
420420 "num_mutations" : self .nodes_num_mutations ,
421421 "ancestors_span" : child_right - child_left ,
422+ "child_left" : child_left , # FIXME add test for this
423+ "child_right" : child_right , # FIXME add test for this
422424 "is_sample" : is_sample ,
423425 }
424426 )
@@ -427,6 +429,8 @@ def nodes_df(self):
427429 "time" : "float64" ,
428430 "num_mutations" : "int" ,
429431 "ancestors_span" : "float64" ,
432+ "child_left" : "float64" ,
433+ "child_right" : "float64" ,
430434 "is_sample" : "bool" ,
431435 }
432436 )
@@ -551,3 +555,53 @@ def calc_mutations_per_tree(self):
551555 mutations_per_tree = np .zeros (self .ts .num_trees , dtype = np .int64 )
552556 mutations_per_tree [unique_values ] = counts
553557 return mutations_per_tree
558+
559+ def compute_ancestor_spans_heatmap_data (self , win_x_size = 1_000_000 , win_y_size = 500 ):
560+ """
561+ Calculates the average ancestor span in a genomic-time window
562+ """
563+ nodes_df = self .nodes_df [self .nodes_df .ancestors_span != - np .inf ]
564+ nodes_df = nodes_df .reset_index (drop = True )
565+ nodes_left = nodes_df .child_left
566+ nodes_right = nodes_df .child_right
567+ nodes_time = nodes_df .time
568+ ancestors_span = nodes_df .ancestors_span
569+
570+ num_x_wins = int (np .ceil (nodes_right .max () - nodes_left .min ()) / win_x_size )
571+ num_y_wins = int (np .ceil (nodes_time .max () / win_y_size ))
572+ heatmap_sums = np .zeros ((num_x_wins , num_y_wins ))
573+ heatmap_counts = np .zeros ((num_x_wins , num_y_wins ))
574+
575+ for u in range (len (nodes_left )):
576+ x_start = int (
577+ np .floor (nodes_left [u ] / win_x_size )
578+ ) # map the node span to the x-axis bins it overlaps
579+ x_end = int (np .floor (nodes_right [u ] / win_x_size ))
580+ y = max (0 , int (np .floor (nodes_time [u ] / win_y_size )) - 1 )
581+ heatmap_sums [x_start :x_end , y ] += min (
582+ ancestors_span [u ], win_x_size
583+ ) # min operator only required for first and last bins
584+ heatmap_counts [x_start :x_end , y ] += 1
585+
586+ avg_spans = heatmap_sums / heatmap_counts
587+ x_coords = np .zeros ((num_x_wins , num_y_wins ))
588+ y_coords = np .zeros ((num_x_wins , num_y_wins ))
589+ for i in range (num_x_wins ):
590+ for j in range (num_y_wins ):
591+ x_coords [i , j ] = i * win_x_size
592+ y_coords [i , j ] = j * win_y_size
593+
594+ df = pd .DataFrame (
595+ {
596+ "genomic_position" : x_coords .flatten (),
597+ "time" : y_coords .flatten (),
598+ "average_ancestor_span" : avg_spans .flatten (),
599+ }
600+ )
601+ return df .astype (
602+ {
603+ "genomic_position" : "int" ,
604+ "time" : "int" ,
605+ "average_ancestor_span" : "float64" ,
606+ }
607+ )
0 commit comments