1919import hgvs .utils .altseqbuilder as altseqbuilder
2020import hgvs .validator
2121from hgvs .decorators .lru_cache import lru_cache
22- from hgvs .enums import PrevalidationLevel
23- from hgvs .exceptions import HGVSInvalidVariantError , HGVSUnsupportedOperationError
22+ from hgvs .enums import PrevalidationLevel , ShiftOverBoundaryPreference
23+ from hgvs .exceptions import HGVSInvalidVariantError , HGVSUnsupportedOperationError , HGVSInvalidIntervalError
2424from hgvs .utils .reftranscriptdata import RefTranscriptData
2525
2626_logger = logging .getLogger (__name__ )
@@ -71,6 +71,8 @@ def __init__(
7171 replace_reference = hgvs .global_config .mapping .replace_reference ,
7272 prevalidation_level = hgvs .global_config .mapping .prevalidation_level ,
7373 add_gene_symbol = hgvs .global_config .mapping .add_gene_symbol ,
74+ shift_over_boundary = hgvs .global_config .mapping .shift_over_boundary ,
75+ shift_over_boundary_preference = hgvs .global_config .mapping .shift_over_boundary_preference ,
7476 ):
7577 """
7678 :param bool replace_reference: replace reference (entails additional network access)
@@ -93,6 +95,11 @@ def __init__(
9395 self .left_normalizer = hgvs .normalizer .Normalizer (
9496 hdp , shuffle_direction = 5 , variantmapper = self
9597 )
98+ self .shift_over_boundary = shift_over_boundary
99+ if shift_over_boundary_preference is None :
100+ self .shift_over_boundary_preference = ShiftOverBoundaryPreference .DEFAULT
101+ else :
102+ self .shift_over_boundary_preference = ShiftOverBoundaryPreference [shift_over_boundary_preference .upper ()]
96103
97104 # ############################################################################
98105 # g⟷t
@@ -436,21 +443,52 @@ def c_to_p(self, var_c, pro_ac=None, alt_ac=None, alt_aln_method=hgvs.global_con
436443 reference_data = RefTranscriptData (self .hdp , var_c .ac , pro_ac , translation_table = translation_table )
437444 builder = altseqbuilder .AltSeqBuilder (var_c , reference_data , translation_table = translation_table )
438445
446+ # attempt to shift ins/dup variants from the intron into the exon or vice versa
447+ if self .shift_over_boundary :
448+ shifts_into_exon_and_intron = False
449+ is_shifted = False
450+ original_region = builder .get_variant_region ()
451+ if (
452+ var_c .posedit .edit .type in ['ins' , 'dup' ]
453+ and original_region in [builder .INTRON , builder .EXON ]
454+ ):
455+ if alt_ac is None :
456+ raise HGVSUnsupportedOperationError (f'mapping specific variant { var_c } requires alt_ac' )
457+ for shifted_var_c in self ._var_c_shifts (var_c , alt_ac , alt_aln_method ):
458+ shifted_reference_data = RefTranscriptData (self .hdp , shifted_var_c .ac , pro_ac )
459+ shifted_builder = altseqbuilder .AltSeqBuilder (shifted_var_c , shifted_reference_data )
460+ shifted_region = shifted_builder .get_variant_region ()
461+ if shifted_region not in [shifted_builder .INTRON , shifted_builder .EXON ]:
462+ continue
463+ if original_region != shifted_region :
464+ # a shift is posible
465+ shifts_into_exon_and_intron = True
466+ if self .shift_over_boundary_preference .name .lower () == shifted_region :
467+ # and that shift is preferred
468+ is_shifted = True
469+ reference_data = shifted_reference_data
470+ builder = shifted_builder
471+ break
472+
439473 # TODO: handle case where you get 2+ alt sequences back;
440474 # currently get list of 1 element loop structure implemented
441475 # to handle this, but doesn't really do anything currently.
442476 all_alt_data = builder .build_altseq ()
443477
444478 var_ps = []
445479 for alt_data in all_alt_data :
446- builder = altseq_to_hgvsp .AltSeqToHgvsp (reference_data , alt_data )
447- var_p = builder .build_hgvsp ()
480+ hgvsp_builder = altseq_to_hgvsp .AltSeqToHgvsp (reference_data , alt_data )
481+ var_p = hgvsp_builder .build_hgvsp ()
448482 var_ps .append (var_p )
449483
450484 var_p = var_ps [0 ]
451485
452486 if self .add_gene_symbol :
453487 self ._update_gene_symbol (var_p , var_c .gene )
488+ var_p .at_boundary = builder .at_boundary
489+ if self .shift_over_boundary :
490+ var_p .shifts_into_exon_and_intron = shifts_into_exon_and_intron
491+ var_p .is_shifted = is_shifted
454492
455493 return var_p
456494
@@ -625,6 +663,59 @@ def _update_gene_symbol(self, var, symbol):
625663 var .gene = symbol
626664 return var
627665
666+ def _var_c_shifts (self , var_c , alt_ac , alt_aln_method ):
667+ """Try to shift c. variants to find alternative representations."""
668+ strand = self ._fetch_AlignmentMapper (tx_ac = var_c .ac , alt_ac = alt_ac , alt_aln_method = alt_aln_method ).strand
669+ var_g = VariantMapper .c_to_g (self , var_c , alt_ac = alt_ac , alt_aln_method = alt_aln_method )
670+ for shifted_var_g in self ._var_g_shifts (var_g , strand = strand , alt_aln_method = alt_aln_method ):
671+ try :
672+ shifted_var_c = VariantMapper .g_to_c (self , shifted_var_g , tx_ac = var_c .ac , alt_aln_method = alt_aln_method )
673+ yield shifted_var_c
674+ except (HGVSInvalidVariantError , HGVSInvalidIntervalError , HGVSUnsupportedOperationError ):
675+ pass
676+
677+ def _var_g_shifts (self , var_g , strand , alt_aln_method ):
678+ """Try to shift g. variants to find alternative representations."""
679+ prev_var_g_strs = [str (var_g )]
680+ for shuffle_direction in [3 , 5 ]:
681+ try :
682+ shifted_var_g = self ._var_g_shift_with_rewrite (var_g , shuffle_direction , strand , alt_aln_method )
683+ if str (shifted_var_g ) in prev_var_g_strs :
684+ continue
685+ prev_var_g_strs .append (str (shifted_var_g ))
686+ yield shifted_var_g
687+ except (HGVSInvalidVariantError , HGVSInvalidIntervalError , HGVSUnsupportedOperationError ):
688+ pass
689+
690+ def _var_g_shift_with_rewrite (self , var_g , shuffle_direction , strand , alt_aln_method ):
691+ """Attempt to shift a variant all the way left or right. Rewrite
692+ duplications as insertions so that the variant is shifted farther
693+ than would normally be possible using the HGVS notation."""
694+ var_g = copy .deepcopy (var_g )
695+ normalizer = hgvs .normalizer .Normalizer (
696+ self .hdp , alt_aln_method = alt_aln_method , validate = False , shuffle_direction = shuffle_direction
697+ )
698+ var_g = normalizer .normalize (var_g )
699+ if var_g .posedit .edit .type == 'dup' :
700+ self ._replace_reference (var_g )
701+ if (strand == 1 and shuffle_direction == 3 ) or (strand == - 1 and shuffle_direction == 5 ):
702+ var_g .posedit = hgvs .posedit .PosEdit (
703+ pos = hgvs .location .Interval (
704+ start = hgvs .location .SimplePosition (base = var_g .posedit .pos .start .base - 1 ),
705+ end = hgvs .location .SimplePosition (base = var_g .posedit .pos .start .base ),
706+ ),
707+ edit = hgvs .edit .NARefAlt (ref = None , alt = var_g .posedit .edit .ref )
708+ )
709+ else :
710+ var_g .posedit = hgvs .posedit .PosEdit (
711+ pos = hgvs .location .Interval (
712+ start = hgvs .location .SimplePosition (base = var_g .posedit .pos .end .base ),
713+ end = hgvs .location .SimplePosition (base = var_g .posedit .pos .end .base + 1 ),
714+ ),
715+ edit = hgvs .edit .NARefAlt (ref = None , alt = var_g .posedit .edit .ref )
716+ )
717+ return var_g
718+
628719
629720# <LICENSE>
630721# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
0 commit comments