Skip to content

Commit 1e40ab0

Browse files
committed
add an option to tell IsoQuant polyA tails were trimmed
1 parent 3f77830 commit 1e40ab0

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

isoquant.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
DataSetReadMapper
3636
)
3737
from src.dataset_processor import DatasetProcessor, PolyAUsageStrategies
38+
from src.alignment_processor import PolyATrimmed
3839
from src.graph_based_model_construction import StrandnessReportingLevel
3940
from src.long_read_assigner import AmbiguityResolvingMethod
4041
from src.long_read_counter import COUNTING_STRATEGIES, CountingStrategy, GroupedOutputFormat, NormalizationMethod
@@ -138,6 +139,9 @@ def add_hidden_option(*args, **kwargs): # show command only with --full-help
138139
help="type of data to process, supported types are: " + ", ".join(DATA_TYPE_ALIASES.keys()))
139140
input_args_group.add_argument('--stranded', type=str, help="reads strandness type, supported values are: " +
140141
", ".join(SUPPORTED_STRANDEDNESS), default="none")
142+
input_args_group.add_argument('--polya_trimmed', default=None, type=str,
143+
choices=[e.name for e in PolyATrimmed],
144+
help="define reads which had polyA tail trimmed")
141145
input_args_group.add_argument('--fl_data', action='store_true', default=False,
142146
help="reads represent FL transcripts; both ends of the read are considered to be reliable")
143147

src/alignment_info.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import logging
99

1010
from .common import get_read_blocks
11+
from .polya_finder import PolyAInfo
1112
from .polya_verification import shift_polya, shift_polyt
1213

1314
logger = logging.getLogger('IsoQuant')
@@ -28,7 +29,7 @@ def __init__(self, alignment):
2829
return
2930
self.read_start = self.read_exons[0][0]
3031
self.read_end = self.read_exons[-1][1]
31-
self.polya_info = None
32+
self.polya_info = PolyAInfo(-1, -1, -1, -1)
3233
self.exons_changed = False
3334
self.cage_hits = []
3435
self.combined_profile = None

src/alignment_processor.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@
3333
logger = logging.getLogger('IsoQuant')
3434

3535

36+
@unique
37+
class PolyATrimmed(Enum):
38+
none = 1
39+
all = 2
40+
stranded = 3
41+
# file = 3
42+
43+
3644
@unique
3745
class AlignmentType(Enum):
3846
primary = 1
@@ -360,32 +368,34 @@ def process_intergenic(self, alignment_storage, region, skip_read_fraction=1):
360368
if skip_read_fraction > 1 and counter % skip_read_fraction != 0:
361369
continue
362370

363-
alignment_info.add_polya_info(self.polya_finder, self.polya_fixer)
364-
# if self.params.cage:
365-
# alignment_info.add_cage_info(self.cage_finder)
371+
if self.params.polya_trimmed == PolyATrimmed.none:
372+
alignment_info.add_polya_info(self.polya_finder, self.polya_fixer)
366373

367374
read_assignment = ReadAssignment(read_id, ReadAssignmentType.intergenic,
368375
IsoformMatch(MatchClassification.intergenic))
369376

370377
if alignment_info.exons_changed:
371378
read_assignment.add_match_attribute(MatchEvent(MatchEventSubtype.aligned_polya_tail))
372-
read_assignment.polyA_found = (alignment_info.polya_info.external_polya_pos != -1 or
373-
alignment_info.polya_info.external_polyt_pos != -1 or
374-
alignment_info.polya_info.internal_polya_pos != -1 or
375-
alignment_info.polya_info.internal_polyt_pos != -1)
379+
376380
read_assignment.polya_info = alignment_info.polya_info
377381
read_assignment.cage_found = len(alignment_info.cage_hits) > 0
378382
read_assignment.genomic_region = region
379383
read_assignment.exons = alignment_info.read_exons
380384
read_assignment.corrected_exons = corrector.correct_read(alignment_info)
381385
read_assignment.corrected_introns = junctions_from_blocks(read_assignment.corrected_exons)
382-
383386
read_assignment.read_group = self.read_groupper.get_group_id(alignment, self.bam_merger.bam_pairs[bam_index][1])
384387
read_assignment.mapped_strand = "-" if alignment.is_reverse else "+"
385388
read_assignment.strand = self.get_assignment_strand(read_assignment)
386389
read_assignment.chr_id = self.chr_id
387390
read_assignment.multimapper = alignment.is_secondary
388391
read_assignment.mapping_quality = alignment.mapping_quality
392+
393+
self.add_artificial_polya(read_assignment)
394+
read_assignment.polyA_found = (alignment_info.polya_info.external_polya_pos != -1 or
395+
alignment_info.polya_info.external_polyt_pos != -1 or
396+
alignment_info.polya_info.internal_polya_pos != -1 or
397+
alignment_info.polya_info.internal_polyt_pos != -1)
398+
389399
AlignmentCollector.import_bam_tags(alignment, read_assignment, self.params.bam_tags)
390400
yield read_assignment
391401

@@ -429,10 +439,7 @@ def process_genic(self, alignment_storage, gene_info, region, skip_read_fraction
429439

430440
if alignment_info.exons_changed:
431441
read_assignment.add_match_attribute(MatchEvent(MatchEventSubtype.aligned_polya_tail))
432-
read_assignment.polyA_found = (alignment_info.polya_info.external_polya_pos != -1 or
433-
alignment_info.polya_info.external_polyt_pos != -1 or
434-
alignment_info.polya_info.internal_polya_pos != -1 or
435-
alignment_info.polya_info.internal_polyt_pos != -1)
442+
436443
read_assignment.polya_info = alignment_info.polya_info
437444
read_assignment.cage_found = len(alignment_info.cage_hits) > 0
438445
read_assignment.genomic_region = region
@@ -446,11 +453,16 @@ def process_genic(self, alignment_storage, gene_info, region, skip_read_fraction
446453
read_assignment.strand = self.get_assignment_strand(read_assignment)
447454
AlignmentCollector.check_antisense(read_assignment)
448455
AlignmentCollector.import_bam_tags(alignment, read_assignment, self.params.bam_tags)
449-
450456
read_assignment.chr_id = gene_info.chr_id
451457
read_assignment.multimapper = alignment.is_secondary
452458
read_assignment.mapping_quality = alignment.mapping_quality
453459

460+
self.add_artificial_polya(read_assignment)
461+
read_assignment.polyA_found = (alignment_info.polya_info.external_polya_pos != -1 or
462+
alignment_info.polya_info.external_polyt_pos != -1 or
463+
alignment_info.polya_info.internal_polya_pos != -1 or
464+
alignment_info.polya_info.internal_polyt_pos != -1)
465+
454466
if self.params.count_exons:
455467
read_assignment.exon_gene_profile = alignment_info.combined_profile.read_exon_profile.gene_profile
456468
read_assignment.intron_gene_profile = alignment_info.combined_profile.read_intron_profile.gene_profile
@@ -519,6 +531,18 @@ def get_gene_info_for_region(self, current_region):
519531
gene_info.set_reference_sequence(current_region[0], current_region[1], self.chr_record)
520532
return gene_info
521533

534+
def add_artificial_polya(self, read_assignment):
535+
if self.params.polya_trimmed == PolyATrimmed.stranded:
536+
if read_assignment.strand == '+':
537+
read_assignment.polya_info.external_polya_pos = read_assignment.corrected_exons[-1][1] + 1
538+
elif read_assignment.strand == '-':
539+
read_assignment.polya_info.external_polyt_pos = read_assignment.corrected_exons[0][0] - 1
540+
elif self.params.polya_trimmed == PolyATrimmed.all:
541+
if read_assignment.mapped_strand == '+':
542+
read_assignment.polya_info.external_polya_pos = read_assignment.corrected_exons[-1][1] + 1
543+
elif read_assignment.mapped_strand == '-':
544+
read_assignment.polya_info.external_polyt_pos = read_assignment.corrected_exons[0][0] - 1
545+
522546
@staticmethod
523547
def split_coverage_regions(genomic_region, alignment_storage):
524548
if interval_len(genomic_region) < AlignmentCollector.MIN_REGION_LEN or \

0 commit comments

Comments
 (0)