@@ -385,6 +385,8 @@ def process_sample(self, sample):
385385 logger .info ("Experiment has " + proper_plural_form ("BAM file" , len (sample .file_list )) + ": " + ", " .join (
386386 map (lambda x : x [0 ], sample .file_list )))
387387 self .chr_ids = self .get_chromosome_ids (sample )
388+ logger .info ("Total number of chromosomes to be processed %d: %s " %
389+ (len (self .chr_ids ), ", " .join (map (lambda x : str (x ), sorted (self .chr_ids )))))
388390 self .args .use_technical_replicas = self .args .read_group == "file_name" and len (sample .file_list ) > 1
389391
390392 self .all_read_groups = set ()
@@ -438,17 +440,23 @@ def process_sample(self, sample):
438440 os .remove (f )
439441 logger .info ("Processed experiment " + sample .prefix )
440442
443+ def keep_only_defined_chromosomes (self , chr_set : set ):
444+ if self .args .process_only_chr :
445+ chr_set .intersection_update (self .args .process_only_chr )
446+ elif self .args .discard_chr :
447+ chr_set .difference_update (self .args .discard_chr )
448+
449+ return chr_set
450+
441451 def get_chromosome_ids (self , sample ):
442452 genome_chromosomes = set (self .reference_record_dict .keys ())
443- for chr_id in self .args .discard_chr :
444- genome_chromosomes .discard (chr_id )
453+ genome_chromosomes = self .keep_only_defined_chromosomes (genome_chromosomes )
445454
446455 bam_chromosomes = set ()
447456 for bam_file in list (map (lambda x : x [0 ], sample .file_list )):
448457 bam = pysam .AlignmentFile (bam_file , "rb" , require_index = True )
449458 bam_chromosomes .update (bam .references )
450- for chr_id in self .args .discard_chr :
451- bam_chromosomes .discard (chr_id )
459+ bam_chromosomes = self .keep_only_defined_chromosomes (bam_chromosomes )
452460
453461 bam_genome_overlap = genome_chromosomes .intersection (bam_chromosomes )
454462 if len (bam_genome_overlap ) != len (genome_chromosomes ) or len (bam_genome_overlap ) != len (bam_chromosomes ):
@@ -472,8 +480,7 @@ def get_chromosome_ids(self, sample):
472480 gffutils_db = gffutils .FeatureDB (self .args .genedb )
473481 for feature in gffutils_db .all_features ():
474482 gene_annotation_chromosomes .add (feature .seqid )
475- for chr_id in self .args .discard_chr :
476- gene_annotation_chromosomes .discard (chr_id )
483+ gene_annotation_chromosomes = self .keep_only_defined_chromosomes (gene_annotation_chromosomes )
477484
478485 common_overlap = gene_annotation_chromosomes .intersection (bam_genome_overlap )
479486 if len (common_overlap ) != len (gene_annotation_chromosomes ):
0 commit comments