Skip to content

Commit ce61048

Browse files
committed
more checks for input files
1 parent dd94a00 commit ce61048

File tree

1 file changed

+75
-10
lines changed

1 file changed

+75
-10
lines changed

isoquant.py

Lines changed: 75 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,45 @@ def check_input_params(args):
541541
return True
542542

543543

544+
def check_bam_file(bam_path: str, check_index: bool = True):
545+
"""Check BAM file exists and optionally has index."""
546+
if not os.path.isfile(bam_path):
547+
logger.critical("BAM file " + bam_path + " does not exist")
548+
sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
549+
if check_index:
550+
bamfile_in = pysam.AlignmentFile(bam_path, "rb")
551+
if not bamfile_in.has_index():
552+
logger.critical("BAM file " + bam_path + " is not indexed, run samtools sort and samtools index")
553+
sys.exit(IsoQuantExitCode.BAM_NOT_INDEXED)
554+
bamfile_in.close()
555+
556+
557+
def check_file_exists(file_path: str, description: str):
558+
"""Check that a file exists, exit with error if not."""
559+
if not os.path.isfile(file_path):
560+
logger.critical(f"{description} {file_path} does not exist")
561+
sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
562+
563+
564+
def extract_read_group_file_path(spec: str):
565+
"""
566+
Extract file path from read_group spec if it's a file-based spec.
567+
568+
Returns file path for 'file:path:...' specs, None otherwise.
569+
"""
570+
parts = spec.split(":")
571+
if len(parts) >= 2 and parts[0] == 'file':
572+
return parts[1]
573+
return None
574+
575+
544576
def check_input_files(args):
577+
# Check reference genome
578+
if args.reference and not os.path.isfile(args.reference):
579+
logger.critical("Reference genome " + args.reference + " does not exist")
580+
sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
581+
582+
# Check input reads (BAM/FASTQ/save files)
545583
for sample in args.input_data.samples:
546584
for lib in sample.file_list:
547585
for in_file in lib:
@@ -554,33 +592,60 @@ def check_input_files(args):
554592
logger.critical("Input file " + in_file + " does not exist")
555593
sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
556594
if args.input_data.input_type == InputDataType.bam:
557-
bamfile_in = pysam.AlignmentFile(in_file, "rb")
558-
if not bamfile_in.has_index():
559-
logger.critical("BAM file " + in_file + " is not indexed, run samtools sort and samtools index")
560-
sys.exit(IsoQuantExitCode.BAM_NOT_INDEXED)
561-
bamfile_in.close()
595+
check_bam_file(in_file, check_index=True)
596+
597+
# Check Illumina BAM files
562598
if sample.illumina_bam is not None:
563599
for illumina in sample.illumina_bam:
564-
bamfile_in = pysam.AlignmentFile(illumina, "rb")
565-
if not bamfile_in.has_index():
566-
logger.critical("BAM file " + illumina + " is not indexed, run samtools sort and samtools index")
567-
sys.exit(IsoQuantExitCode.BAM_NOT_INDEXED)
568-
bamfile_in.close()
600+
check_bam_file(illumina, check_index=True)
601+
602+
# Check barcoded reads files (from args, not sample - sample.barcoded_reads is set later)
603+
if hasattr(args, 'barcoded_reads') and args.barcoded_reads:
604+
if isinstance(args.barcoded_reads, list):
605+
for bc_file in args.barcoded_reads:
606+
check_file_exists(bc_file, "Barcoded reads file")
607+
else:
608+
check_file_exists(args.barcoded_reads, "Barcoded reads file")
609+
610+
# Check barcode whitelist files
611+
if hasattr(args, 'barcode_whitelist') and args.barcode_whitelist:
612+
for wl_file in args.barcode_whitelist:
613+
check_file_exists(wl_file, "Barcode whitelist file")
614+
615+
# Check barcode2spot file (parse spec to extract filename)
616+
if hasattr(args, 'barcode2spot') and args.barcode2spot:
617+
from src.read_groups import parse_barcode2spot_spec
618+
bc2spot_file, _, _ = parse_barcode2spot_spec(args.barcode2spot)
619+
check_file_exists(bc2spot_file, "Barcode to spot mapping file")
620+
621+
# Check read_group file specs
622+
if hasattr(args, 'read_group') and args.read_group:
623+
for spec in args.read_group:
624+
file_path = extract_read_group_file_path(spec)
625+
if file_path:
626+
check_file_exists(file_path, "Read group file")
627+
628+
# Check junction BED file
629+
if hasattr(args, 'junc_bed_file') and args.junc_bed_file:
630+
check_file_exists(args.junc_bed_file, "Junction BED file")
569631

632+
# Check CAGE file (currently not supported)
570633
if args.cage is not None:
571634
logger.critical("CAGE data is not supported yet")
572635
sys.exit(IsoQuantExitCode.INVALID_PARAMETER)
573636
if not os.path.isfile(args.cage):
574637
logger.critical("Bed file with CAGE peaks " + args.cage + " does not exist")
575638
sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
576639

640+
# Check gene database
577641
if args.genedb is not None:
578642
if not os.path.isfile(args.genedb):
579643
logger.critical("Gene database " + args.genedb + " does not exist")
580644
sys.exit(IsoQuantExitCode.GENE_DB_NOT_FOUND)
581645
else:
582646
args.no_junc_bed = True
583647

648+
# Check read assignments
584649
if args.read_assignments is not None:
585650
for r in args.read_assignments:
586651
if not glob.glob(r + "*"):

0 commit comments

Comments
 (0)