@@ -14,18 +14,58 @@ def parse_samples(fl):
1414 line = l .rstrip ()
1515 if not line or line .startswith ("#" ):
1616 continue
17- try :
18- sample , path = line .split ()
19- except :
17+ fields = line .split ()
18+
19+ if len (fields ) == 2 :
20+ # if samples.tsv has the old format, assume aa-tRNA-seq input
21+ try :
22+ sample , path = fields
23+
24+ except ValueError :
2025 print (
21- "samples file must have 2 columns, sample_id and data_path, separated by whitespace" ,
26+ "samples file must have 2 columns (sample_id and data_path, in which case "
27+ "aa-tRNA-seq input will be assumed), or 5 columns ((sample_id, data_path, "
28+ "sequencing_input, organism, chemistry)) separated by whitespace" ,
2229 file = sys .stderr ,
2330 )
2431 sys .exit (f"found { line } " )
32+ sequencing_input = "aa-tRNA"
33+ organisms = "scerevisiae"
34+ chemistry = "RNA004"
35+ basecall_model = "sup"
36+
37+ elif len (fields ) == 5 :
38+ # new format, use provided values
39+ try :
40+ sample , path , sequencing_input , organism , chemistry = fields
41+ except ValueError :
42+ print (
43+ "sample file must have either 2 or 5 columns, separated by whitespace."
44+ file = sys .stderr
45+ )
46+ sys .exit (f"found { line } " )
2547 if sample in samples :
2648 samples [sample ]["path" ].add (path )
2749 else :
28- samples [sample ] = {"path" : {path }}
50+ print (
51+ "Error: samples file must have either 2 or 5 columns:\n "
52+ "2-column format: sample_id, data_path (defaults to scerevisiae RNA004 aa-tRNA)\n "
53+ "5-column format: sample_id, data_path, sequencing_input, organism, chemistry" ,
54+ file = sys .stderr ,
55+ )
56+ sys .exit (f"found { line } " )
57+
58+ if sample in samples :
59+ print (f"Duplicate sample found: { sample } , file = sys .stderr )
60+ sys .exit (1 )
61+ else :
62+ samples [sample ] = {
63+ "path" : path ,
64+ "sequencing_input" : sequencing_input ,
65+ "organism" : organism , # defaults to scerevisiae if 2 cols
66+ "chemistry" : chemistry ,
67+ "basecall_model" : basecall_model
68+ }
2969 return samples
3070
3171
0 commit comments