Skip to content

Commit 8ae5a93

Browse files
committed
update & add scripts used in manuscript
1 parent 9e5321b commit 8ae5a93

File tree

7 files changed

+563
-134
lines changed

7 files changed

+563
-134
lines changed

src/python/misc/collectMetrics.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,13 @@ def parse() -> Namespace:
2424
parser.add_argument("time_uncalled4", type=str, help="Path to the tools time file")
2525
parser.add_argument("time_f5c_eventalign", type=str, help="Path to the tools time file")
2626
parser.add_argument("time_f5c_resquiggle", type=str, help="Path to the tools time file")
27+
parser.add_argument("subtools_dynamont", type=str, help="Path to the downstream tool metrics file")
28+
parser.add_argument("subtools_uncalled4", type=str, help="Path to the downstream tool metrics file")
29+
parser.add_argument("subtools_f5c_eventalign", type=str, help="Path to the downstream tool metrics file")
30+
parser.add_argument("subtools_f5c_resquiggle", type=str, help="Path to the downstream tool metrics file")
2731
parser.add_argument("--tombo", type=str, default=None, help="Path to the reads metrics json file")
28-
parser.add_argument("--time_tombo", type=str, default=None, help="Path to the tolls time file")
32+
parser.add_argument("--time_tombo", type=str, default=None, help="Path to the tools time file")
33+
parser.add_argument("--subtools_tombo", type=str, default=None, help="Path to the downstream tool metrics time file")
2934
return parser.parse_args()
3035

3136
def main() -> None:
@@ -47,13 +52,23 @@ def main() -> None:
4752
"f5c_resquiggle": args.time_f5c_resquiggle
4853
}
4954

55+
downstream_tools = {
56+
"dynamont": args.subtools_dynamont,
57+
"uncalled4": args.subtools_uncalled4,
58+
"f5c_eventalign": args.subtools_f5c_eventalign,
59+
"f5c_resquiggle": args.subtools_f5c_resquiggle
60+
}
61+
5062
# NA rna004
5163
if args.tombo and args.tombo != '':
5264
jsons["tombo"] = args.tombo
5365

5466
if args.time_tombo and args.time_tombo != '':
5567
times["tombo"] = args.time_tombo
5668

69+
if args.subtools_tombo and args.subtools_tombo != '':
70+
downstream_tools["tombo"] = args.subtools_tombo
71+
5772
for name, json_path in jsons.items():
5873
with open(json_path, "r") as json_file:
5974
json_data = json.load(json_file)
@@ -79,14 +94,33 @@ def main() -> None:
7994
})
8095
scores = pd.concat([scores, new_entry], ignore_index=True)
8196

82-
control = pd.read_csv(args.control, sep="\t")
83-
for _, row in control.iterrows():
84-
new_entry = pd.DataFrame({
85-
"Tool": ["Control Random", "Control Uniform"],
86-
"Value": [row["Value"], row["Value"]],
87-
"Metric": [row["Metric"].lower() + '_length', row["Metric"].lower() + '_length']
88-
})
89-
scores = pd.concat([scores, new_entry], ignore_index=True)
97+
for name, downstream_path in downstream_tools.items():
98+
with open(downstream_path, "r") as downstream_file:
99+
total_assembly_length = int(downstream_file.readline().strip().split(': ')[1])
100+
n50 = int(downstream_file.readline().strip().split(': ')[1])
101+
mean_cov = float(downstream_file.readline().strip().split(': ')[1])
102+
struct_vars = int(downstream_file.readline().strip().split(': ')[1])
103+
104+
new_entry = pd.DataFrame({
105+
"Tool": [name, name, name, name],
106+
"Value": [total_assembly_length, n50, mean_cov, struct_vars],
107+
"Metric": ["flye total length", "flye n50", "flye mean coverage", "SVIM structural variants"]
108+
})
109+
scores = pd.concat([scores, new_entry], ignore_index=True)
110+
111+
# control = pd.read_csv(args.control, sep="\t")
112+
# for _, row in control.iterrows():
113+
# new_entry = pd.DataFrame({
114+
# "Tool": ["Control Random", "Control Uniform"],
115+
# "Value": [row["Value"], row["Value"]],
116+
# "Metric": [row["Metric"].lower() + '_length', row["Metric"].lower() + '_length']
117+
# })
118+
# scores = pd.concat([scores, new_entry], ignore_index=True)
119+
120+
#! remove controls and dorado
121+
scores = scores[scores["Tool"] != "Control Random"]
122+
scores = scores[scores["Tool"] != "Control Uniform"]
123+
scores = scores[scores["Tool"] != "Dorado"]
90124

91125
# fix names
92126
scores["Tool"] = scores["Tool"].replace(
@@ -100,11 +134,12 @@ def main() -> None:
100134
}
101135
)
102136

103-
# Ensure Value column is numeric where needed
104-
# scores["Value"] = pd.to_numeric(scores["Value"], errors="coerce")
137+
# Remove unwanted metrics
138+
removed_metrics = ["missing reads", "identical reads"]
139+
scores = scores[~scores["Metric"].isin(removed_metrics)]
105140

106141
# Exclude specific metrics (e.g., "Time in hh:mm:ss") from the Metric Score calculation
107-
excluded_metrics = ["Time in hh:mm:ss"]
142+
excluded_metrics = ["Time in hh:mm:ss", "Memory in MB"]
108143
numeric_scores = scores[~scores["Metric"].isin(excluded_metrics)]
109144
numeric_scores["Value"] = pd.to_numeric(numeric_scores["Value"], errors="coerce")
110145

src/python/misc/compareDatasets.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,20 @@
7171
"Read Lengths" : "/data/fass5/projects/js_dynamont/benchmark/comparison/rna004/ivt/comparison_w0_readLengths.csv",
7272
"Read Quality" : "/data/fass5/projects/js_dynamont/benchmark/comparison/rna004/ivt/comparison_w0_readQuality.csv",
7373
},
74+
},
75+
"DNA R10.4.1 5kHz" : {
76+
"H. Sapiens" : {
77+
"Score" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/h_sapiens/comparison_w0_score.txt",
78+
"Reads Segmentation Ratio" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/h_sapiens/comparison_w0_segmentedReadsRatio.csv",
79+
"Read Lengths" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/h_sapiens/comparison_w0_readLengths.csv",
80+
"Read Quality" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/h_sapiens/comparison_w0_readQuality.csv",
81+
},
82+
"Zymo hmw" : {
83+
"Score" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/zymo_hmw/comparison_w0_score.txt",
84+
"Reads Segmentation Ratio" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/zymo_hmw/comparison_w0_segmentedReadsRatio.csv",
85+
"Read Lengths" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/zymo_hmw/comparison_w0_readLengths.csv",
86+
"Read Quality" : "/data/fass5/projects/js_dynamont/benchmark/comparison/dna_r10.4.1_5kHz/zymo_hmw/comparison_w0_readQuality.csv",
87+
}
7488
}
7589
}
7690

0 commit comments

Comments
 (0)