@@ -233,8 +233,12 @@ def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
233233 """
234234
235235 if ";" in path_string :
236- fwd = Path (path_string .split (";" )[0 ]).name
237- rev = Path (path_string .split (";" )[1 ]).name
236+ if path_string .count (";" ) == 1 :
237+ fwd = Path (path_string .split (";" )[0 ]).name
238+ rev = Path (path_string .split (";" )[1 ]).name
239+ else : # three files per sample
240+ fwd = Path (path_string .split (";" )[1 ]).name
241+ rev = Path (path_string .split (";" )[2 ]).name
238242 else :
239243 fwd = Path (path_string ).name
240244 rev = "NA"
@@ -252,13 +256,17 @@ def parse_to_mag(libraries):
252256 get_filename , orientation = "rev"
253257 )
254258 libraries ["short_reads_2" ] = libraries ["short_reads_2" ].replace ("NA" , "" )
259+ libraries ["short_reads_platform" ] = libraries ["instrument_model" ].apply (get_sequencing_platform )
255260 libraries ["longs_reads" ] = ""
261+ libraries ["long_reads_platform" ] = ""
256262 col2keep = [
257263 "archive_data_accession" ,
258264 "archive_sample_accession" ,
259265 "short_reads_1" ,
260266 "short_reads_2" ,
261267 "longs_reads" ,
268+ "short_reads_platform" ,
269+ "long_reads_platform" ,
262270 ]
263271 libraries = libraries [col2keep ].rename (
264272 columns = {
@@ -624,3 +632,30 @@ def is_merge_size_zero(
624632 if samples .shape [0 ] != 0 and library_selected .shape [0 ] == 0 :
625633 return True
626634 return False
635+
636+
637+ def get_sequencing_platform (instrument_model : str ) -> str :
638+ """
639+ Infer sequencing platform from the instrument model.
640+
641+ Args:
642+ instrument_model (str): instrument model reported on ENA
643+ Returns:
644+ str: sequencing platform
645+ """
646+ if (instrument_model .startswith ("Illumina" ) or
647+ instrument_model .startswith ("HiSeq" ) or
648+ instrument_model .startswith ("NextSeq" )):
649+ return "ILLUMINA"
650+ elif instrument_model .startswith ("454" ):
651+ return "454"
652+ elif instrument_model .startswith ("AB" ):
653+ return "AB"
654+ elif instrument_model .startswith ("BGISEQ" ):
655+ return "BGISEQ"
656+ elif instrument_model == "Complete Genomics" :
657+ return instrument_model
658+ elif instrument_model .startswith ("Helicos" ):
659+ return "Helicos"
660+ else :
661+ return "Unknown"
0 commit comments