Skip to content

Commit 8488eb8

Browse files
committed
new samples.tsv parsing
1 parent a655e5e commit 8488eb8

File tree

1 file changed

+45
-5
lines changed

1 file changed

+45
-5
lines changed

workflow/rules/common.smk

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,58 @@ def parse_samples(fl):
1414
line = l.rstrip()
1515
if not line or line.startswith("#"):
1616
continue
17-
try:
18-
sample, path = line.split()
19-
except:
17+
fields = line.split()
18+
19+
if len(fields) == 2:
20+
# if samples.tsv has the old format, assume aa-tRNA-seq input
21+
try:
22+
sample, path = fields
23+
24+
except ValueError:
2025
print(
21-
"samples file must have 2 columns, sample_id and data_path, separated by whitespace",
26+
"samples file must have 2 columns (sample_id and data_path, in which case "
27+
"aa-tRNA-seq input will be assumed), or 5 columns ((sample_id, data_path, "
28+
"sequencing_input, organism, chemistry)) separated by whitespace",
2229
file=sys.stderr,
2330
)
2431
sys.exit(f"found {line}")
32+
sequencing_input = "aa-tRNA"
33+
organisms = "scerevisiae"
34+
chemistry = "RNA004"
35+
basecall_model = "sup"
36+
37+
elif len(fields) == 5:
38+
# new format, use provided values
39+
try:
40+
sample, path, sequencing_input, organism, chemistry = fields
41+
except ValueError:
42+
print(
43+
"sample file must have either 2 or 5 columns, separated by whitespace."
44+
file=sys.stderr
45+
)
46+
sys.exit(f"found {line}")
2547
if sample in samples:
2648
samples[sample]["path"].add(path)
2749
else:
28-
samples[sample] = {"path": {path}}
50+
print(
51+
"Error: samples file must have either 2 or 5 columns:\n"
52+
"2-column format: sample_id, data_path (defaults to scerevisiae RNA004 aa-tRNA)\n"
53+
"5-column format: sample_id, data_path, sequencing_input, organism, chemistry",
54+
file=sys.stderr,
55+
)
56+
sys.exit(f"found {line}")
57+
58+
if sample in samples:
59+
print(f"Duplicate sample found: {sample}, file=sys.stderr)
60+
sys.exit(1)
61+
else:
62+
samples[sample] = {
63+
"path": path,
64+
"sequencing_input": sequencing_input,
65+
"organism": organism, # defaults to scerevisiae if 2 cols
66+
"chemistry": chemistry,
67+
"basecall_model": basecall_model
68+
}
2969
return samples
3070

3171

0 commit comments

Comments
 (0)