@@ -451,11 +451,12 @@ def __init__(
451451 quick = False ,
452452 show_progress = True ,
453453 pango_source = "Viridian_pangolin" ,
454+ scorpio_source = "Viridian_scorpio" ,
454455 sample_group_id_prefix_len = 10 ,
455456 ):
456457 self .ts = ts
457458 self .pango_source = pango_source
458- self .scorpio_source = "Viridian_scorpio"
459+ self .scorpio_source = scorpio_source
459460 self .strain_map = {}
460461 self .recombinants = np .where (ts .nodes_flags == core .NODE_IS_RECOMBINANT )[0 ]
461462
@@ -970,6 +971,19 @@ def recombinants_summary(
970971 ):
971972 if parent_pango_source is None :
972973 parent_pango_source = self .pango_source
974+
975+ def node_info (node , label ):
976+ datum = {label : node }
977+ datum [f"{ label } _pango" ] = self .nodes_metadata [node ].get (
978+ self .pango_source , "Unknown"
979+ )
980+ datum [f"{ label } _scorpio" ] = self .nodes_metadata [node ].get (
981+ self .scorpio_source , "Unknown"
982+ )
983+ datum [f"{ label } _time" ] = self .ts .nodes_time [node ]
984+ datum [f"{ label } _date" ] = self .nodes_date [node ]
985+ return datum
986+
973987 data = []
974988 for u in self .recombinants :
975989 md = dict (self .nodes_metadata [u ]["sc2ts" ])
@@ -1011,30 +1025,33 @@ def recombinants_summary(
10111025 interval = breakpoint_intervals [0 ]
10121026 parent_left = hmm_match ["path" ][0 ]["parent" ]
10131027 parent_right = hmm_match ["path" ][1 ]["parent" ]
1014- data .append (
1015- {
1016- "recombinant" : u ,
1017- "descendants" : self .nodes_max_descendant_samples [u ],
1018- "sample" : v ,
1019- "sample_pango" : causal_lineages [v ],
1020- "num_samples" : len (samples ),
1021- "distinct_sample_pango" : len (set (causal_lineages .values ())),
1022- "interval_left" : interval [0 ][0 ],
1023- "interval_right" : interval [0 ][1 ],
1024- "parent_left" : parent_left ,
1025- "parent_right" : parent_right ,
1026- "parent_left_pango" : self .nodes_metadata [parent_left ].get (
1027- parent_pango_source ,
1028- "Unknown" ,
1029- ),
1030- "parent_right_pango" : self .nodes_metadata [parent_right ].get (
1031- parent_pango_source ,
1032- "Unknown" ,
1033- ),
1034- "num_mutations" : len (hmm_match ["mutations" ]),
1035- ** md ,
1036- }
1037- )
1028+
1029+ datum = {
1030+ "num_descendant_samples" : self .nodes_max_descendant_samples [u ],
1031+ "num_samples" : len (samples ),
1032+ "distinct_sample_pango" : len (set (causal_lineages .values ())),
1033+ "interval_left" : interval [0 ][0 ],
1034+ "interval_right" : interval [0 ][1 ],
1035+ "num_mutations" : len (hmm_match ["mutations" ]),
1036+ "Viridian_amplicon_scheme" : self .nodes_metadata [v ].get (
1037+ "Viridian_amplicon_scheme" , "Unknown"
1038+ ),
1039+ "Artic_primer_version" : self .nodes_metadata [v ].get (
1040+ "Artic_primer_version" , "Unknown"
1041+ ),
1042+ ** md ,
1043+ }
1044+
1045+ for node , label in [
1046+ (u , "recombinant" ),
1047+ (v , "sample" ),
1048+ (parent_left , "parent_left" ),
1049+ (parent_right , "parent_right" ),
1050+ ]:
1051+ datum = {** datum , ** node_info (node , label )}
1052+
1053+ data .append (datum )
1054+
10381055 # Compute the MRCAs by iterating along trees in order of
10391056 # breakpoint. We use the right interval
10401057 df = pd .DataFrame (data ).sort_values ("interval_right" )
@@ -1051,10 +1068,11 @@ def recombinants_summary(
10511068 left_path = jit .get_root_path (tree , row .parent_left )
10521069 assert tree .parent (row .recombinant ) == row .parent_left
10531070 mrca = jit .get_path_mrca (left_path , right_path , self .ts .nodes_time )
1054- mrca_data .append (mrca )
1055- mrca_data = np .array (mrca_data )
1056- df ["mrca" ] = mrca_data
1057- df ["t_mrca" ] = self .ts .nodes_time [mrca_data ]
1071+ mrca_data .append (node_info (mrca , "parent_mrca" ))
1072+
1073+ mrca_df = pd .DataFrame (mrca_data )
1074+ for col in mrca_df :
1075+ df [col ] = mrca_df [col ]
10581076
10591077 if characterise_copying :
10601078 # Slow - don't do this unless we really want to.
0 commit comments