@@ -24,8 +24,13 @@ def parse() -> Namespace:
2424 parser .add_argument ("time_uncalled4" , type = str , help = "Path to the tools time file" )
2525 parser .add_argument ("time_f5c_eventalign" , type = str , help = "Path to the tools time file" )
2626 parser .add_argument ("time_f5c_resquiggle" , type = str , help = "Path to the tools time file" )
27+ parser .add_argument ("subtools_dynamont" , type = str , help = "Path to the downstream tool metrics file" )
28+ parser .add_argument ("subtools_uncalled4" , type = str , help = "Path to the downstream tool metrics file" )
29+ parser .add_argument ("subtools_f5c_eventalign" , type = str , help = "Path to the downstream tool metrics file" )
30+ parser .add_argument ("subtools_f5c_resquiggle" , type = str , help = "Path to the downstream tool metrics file" )
2731 parser .add_argument ("--tombo" , type = str , default = None , help = "Path to the reads metrics json file" )
28- parser .add_argument ("--time_tombo" , type = str , default = None , help = "Path to the tolls time file" )
32+ parser .add_argument ("--time_tombo" , type = str , default = None , help = "Path to the tools time file" )
33+ parser .add_argument ("--subtools_tombo" , type = str , default = None , help = "Path to the downstream tool metrics time file" )
2934 return parser .parse_args ()
3035
3136def main () -> None :
@@ -47,13 +52,23 @@ def main() -> None:
4752 "f5c_resquiggle" : args .time_f5c_resquiggle
4853 }
4954
55+ downstream_tools = {
56+ "dynamont" : args .subtools_dynamont ,
57+ "uncalled4" : args .subtools_uncalled4 ,
58+ "f5c_eventalign" : args .subtools_f5c_eventalign ,
59+ "f5c_resquiggle" : args .subtools_f5c_resquiggle
60+ }
61+
5062 # NA rna004
5163 if args .tombo and args .tombo != '' :
5264 jsons ["tombo" ] = args .tombo
5365
5466 if args .time_tombo and args .time_tombo != '' :
5567 times ["tombo" ] = args .time_tombo
5668
69+ if args .subtools_tombo and args .subtools_tombo != '' :
70+ downstream_tools ["tombo" ] = args .subtools_tombo
71+
5772 for name , json_path in jsons .items ():
5873 with open (json_path , "r" ) as json_file :
5974 json_data = json .load (json_file )
@@ -79,14 +94,33 @@ def main() -> None:
7994 })
8095 scores = pd .concat ([scores , new_entry ], ignore_index = True )
8196
82- control = pd .read_csv (args .control , sep = "\t " )
83- for _ , row in control .iterrows ():
84- new_entry = pd .DataFrame ({
85- "Tool" : ["Control Random" , "Control Uniform" ],
86- "Value" : [row ["Value" ], row ["Value" ]],
87- "Metric" : [row ["Metric" ].lower () + '_length' , row ["Metric" ].lower () + '_length' ]
88- })
89- scores = pd .concat ([scores , new_entry ], ignore_index = True )
97+ for name , downstream_path in downstream_tools .items ():
98+ with open (downstream_path , "r" ) as downstream_file :
99+ total_assembly_length = int (downstream_file .readline ().strip ().split (': ' )[1 ])
100+ n50 = int (downstream_file .readline ().strip ().split (': ' )[1 ])
101+ mean_cov = float (downstream_file .readline ().strip ().split (': ' )[1 ])
102+ struct_vars = int (downstream_file .readline ().strip ().split (': ' )[1 ])
103+
104+ new_entry = pd .DataFrame ({
105+ "Tool" : [name , name , name , name ],
106+ "Value" : [total_assembly_length , n50 , mean_cov , struct_vars ],
107+ "Metric" : ["flye total length" , "flye n50" , "flye mean coverage" , "SVIM structural variants" ]
108+ })
109+ scores = pd .concat ([scores , new_entry ], ignore_index = True )
110+
111+ # control = pd.read_csv(args.control, sep="\t")
112+ # for _, row in control.iterrows():
113+ # new_entry = pd.DataFrame({
114+ # "Tool": ["Control Random", "Control Uniform"],
115+ # "Value": [row["Value"], row["Value"]],
116+ # "Metric": [row["Metric"].lower() + '_length', row["Metric"].lower() + '_length']
117+ # })
118+ # scores = pd.concat([scores, new_entry], ignore_index=True)
119+
120+ #! remove controls and dorado
121+ scores = scores [scores ["Tool" ] != "Control Random" ]
122+ scores = scores [scores ["Tool" ] != "Control Uniform" ]
123+ scores = scores [scores ["Tool" ] != "Dorado" ]
90124
91125 # fix names
92126 scores ["Tool" ] = scores ["Tool" ].replace (
@@ -100,11 +134,12 @@ def main() -> None:
100134 }
101135 )
102136
103- # Ensure Value column is numeric where needed
104- # scores["Value"] = pd.to_numeric(scores["Value"], errors="coerce")
137+ # Remove unwanted metrics
138+ removed_metrics = ["missing reads" , "identical reads" ]
139+ scores = scores [~ scores ["Metric" ].isin (removed_metrics )]
105140
106141 # Exclude specific metrics (e.g., "Time in hh:mm:ss") from the Metric Score calculation
107- excluded_metrics = ["Time in hh:mm:ss" ]
142+ excluded_metrics = ["Time in hh:mm:ss" , "Memory in MB" ]
108143 numeric_scores = scores [~ scores ["Metric" ].isin (excluded_metrics )]
109144 numeric_scores ["Value" ] = pd .to_numeric (numeric_scores ["Value" ], errors = "coerce" )
110145
0 commit comments