@@ -541,7 +541,45 @@ def check_input_params(args):
541541 return True
542542
543543
544+ def check_bam_file (bam_path : str , check_index : bool = True ):
545+ """Check BAM file exists and optionally has index."""
546+ if not os .path .isfile (bam_path ):
547+ logger .critical ("BAM file " + bam_path + " does not exist" )
548+ sys .exit (IsoQuantExitCode .INPUT_FILE_NOT_FOUND )
549+ if check_index :
550+ bamfile_in = pysam .AlignmentFile (bam_path , "rb" )
551+ if not bamfile_in .has_index ():
552+ logger .critical ("BAM file " + bam_path + " is not indexed, run samtools sort and samtools index" )
553+ sys .exit (IsoQuantExitCode .BAM_NOT_INDEXED )
554+ bamfile_in .close ()
555+
556+
557+ def check_file_exists (file_path : str , description : str ):
558+ """Check that a file exists, exit with error if not."""
559+ if not os .path .isfile (file_path ):
560+ logger .critical (f"{ description } { file_path } does not exist" )
561+ sys .exit (IsoQuantExitCode .INPUT_FILE_NOT_FOUND )
562+
563+
564+ def extract_read_group_file_path (spec : str ):
565+ """
566+ Extract file path from read_group spec if it's a file-based spec.
567+
568+ Returns file path for 'file:path:...' specs, None otherwise.
569+ """
570+ parts = spec .split (":" )
571+ if len (parts ) >= 2 and parts [0 ] == 'file' :
572+ return parts [1 ]
573+ return None
574+
575+
544576def check_input_files (args ):
577+ # Check reference genome
578+ if args .reference and not os .path .isfile (args .reference ):
579+ logger .critical ("Reference genome " + args .reference + " does not exist" )
580+ sys .exit (IsoQuantExitCode .INPUT_FILE_NOT_FOUND )
581+
582+ # Check input reads (BAM/FASTQ/save files)
545583 for sample in args .input_data .samples :
546584 for lib in sample .file_list :
547585 for in_file in lib :
@@ -554,33 +592,60 @@ def check_input_files(args):
554592 logger .critical ("Input file " + in_file + " does not exist" )
555593 sys .exit (IsoQuantExitCode .INPUT_FILE_NOT_FOUND )
556594 if args .input_data .input_type == InputDataType .bam :
557- bamfile_in = pysam .AlignmentFile (in_file , "rb" )
558- if not bamfile_in .has_index ():
559- logger .critical ("BAM file " + in_file + " is not indexed, run samtools sort and samtools index" )
560- sys .exit (IsoQuantExitCode .BAM_NOT_INDEXED )
561- bamfile_in .close ()
595+ check_bam_file (in_file , check_index = True )
596+
597+ # Check Illumina BAM files
562598 if sample .illumina_bam is not None :
563599 for illumina in sample .illumina_bam :
564- bamfile_in = pysam .AlignmentFile (illumina , "rb" )
565- if not bamfile_in .has_index ():
566- logger .critical ("BAM file " + illumina + " is not indexed, run samtools sort and samtools index" )
567- sys .exit (IsoQuantExitCode .BAM_NOT_INDEXED )
568- bamfile_in .close ()
600+ check_bam_file (illumina , check_index = True )
601+
602+ # Check barcoded reads files (from args, not sample - sample.barcoded_reads is set later)
603+ if hasattr (args , 'barcoded_reads' ) and args .barcoded_reads :
604+ if isinstance (args .barcoded_reads , list ):
605+ for bc_file in args .barcoded_reads :
606+ check_file_exists (bc_file , "Barcoded reads file" )
607+ else :
608+ check_file_exists (args .barcoded_reads , "Barcoded reads file" )
609+
610+ # Check barcode whitelist files
611+ if hasattr (args , 'barcode_whitelist' ) and args .barcode_whitelist :
612+ for wl_file in args .barcode_whitelist :
613+ check_file_exists (wl_file , "Barcode whitelist file" )
614+
615+ # Check barcode2spot file (parse spec to extract filename)
616+ if hasattr (args , 'barcode2spot' ) and args .barcode2spot :
617+ from src .read_groups import parse_barcode2spot_spec
618+ bc2spot_file , _ , _ = parse_barcode2spot_spec (args .barcode2spot )
619+ check_file_exists (bc2spot_file , "Barcode to spot mapping file" )
620+
621+ # Check read_group file specs
622+ if hasattr (args , 'read_group' ) and args .read_group :
623+ for spec in args .read_group :
624+ file_path = extract_read_group_file_path (spec )
625+ if file_path :
626+ check_file_exists (file_path , "Read group file" )
627+
628+ # Check junction BED file
629+ if hasattr (args , 'junc_bed_file' ) and args .junc_bed_file :
630+ check_file_exists (args .junc_bed_file , "Junction BED file" )
569631
632+ # Check CAGE file (currently not supported)
570633 if args .cage is not None :
571634 logger .critical ("CAGE data is not supported yet" )
572635 sys .exit (IsoQuantExitCode .INVALID_PARAMETER )
573636 if not os .path .isfile (args .cage ):
574637 logger .critical ("Bed file with CAGE peaks " + args .cage + " does not exist" )
575638 sys .exit (IsoQuantExitCode .INPUT_FILE_NOT_FOUND )
576639
640+ # Check gene database
577641 if args .genedb is not None :
578642 if not os .path .isfile (args .genedb ):
579643 logger .critical ("Gene database " + args .genedb + " does not exist" )
580644 sys .exit (IsoQuantExitCode .GENE_DB_NOT_FOUND )
581645 else :
582646 args .no_junc_bed = True
583647
648+ # Check read assignments
584649 if args .read_assignments is not None :
585650 for r in args .read_assignments :
586651 if not glob .glob (r + "*" ):
0 commit comments