1616import hgvs .sequencevariant
1717import hgvs .validator
1818from hgvs .decorators .lru_cache import lru_cache
19- from hgvs .enums import PrevalidationLevel
20- from hgvs .exceptions import HGVSInvalidVariantError , HGVSUnsupportedOperationError
19+ from hgvs .enums import PrevalidationLevel , ShiftOverBoundaryPreference
20+ from hgvs .exceptions import (
21+ HGVSInvalidIntervalError ,
22+ HGVSInvalidVariantError ,
23+ HGVSUnsupportedOperationError ,
24+ )
2125from hgvs .utils import altseq_to_hgvsp , altseqbuilder
2226from hgvs .utils .position import get_start_end , get_start_end_interbase
2327from hgvs .utils .reftranscriptdata import RefTranscriptData
2428
2529_logger = logging .getLogger (__name__ )
2630
31+ _SHUFFLE_3PRIME = 3
32+ _SHUFFLE_5PRIME = 5
33+
2734
2835class VariantMapper :
2936 r"""Maps SequenceVariant objects between g., n., r., c., and p. representations.
@@ -70,6 +77,8 @@ def __init__(
7077 replace_reference = hgvs .global_config .mapping .replace_reference ,
7178 prevalidation_level = hgvs .global_config .mapping .prevalidation_level ,
7279 add_gene_symbol = hgvs .global_config .mapping .add_gene_symbol ,
80+ shift_over_boundary = hgvs .global_config .mapping .shift_over_boundary ,
81+ shift_over_boundary_preference = hgvs .global_config .mapping .shift_over_boundary_preference ,
7382 ):
7483 """
7584 :param bool replace_reference: replace reference (entails additional network access)
@@ -92,6 +101,13 @@ def __init__(
92101 self .left_normalizer = hgvs .normalizer .Normalizer (
93102 hdp , shuffle_direction = 5 , variantmapper = self
94103 )
104+ self .shift_over_boundary = shift_over_boundary
105+ if shift_over_boundary_preference is None :
106+ self .shift_over_boundary_preference = ShiftOverBoundaryPreference .DEFAULT
107+ else :
108+ self .shift_over_boundary_preference = ShiftOverBoundaryPreference [
109+ shift_over_boundary_preference .upper ()
110+ ]
95111
96112 # ############################################################################
97113 # g⟷t
@@ -456,15 +472,43 @@ def c_to_p(
456472 var_c , reference_data , translation_table = reference_data .translation_table
457473 )
458474
475+ # attempt to shift ins/dup variants from the intron into the exon or vice versa
476+ if self .shift_over_boundary :
477+ original_region = builder .get_variant_region ()
478+ if var_c .posedit .edit .type in ["ins" , "dup" ] and original_region in [
479+ builder .INTRON ,
480+ builder .EXON ,
481+ ]:
482+ if alt_ac is None :
483+ msg = f"mapping specific variant { var_c } requires alt_ac"
484+ raise HGVSUnsupportedOperationError (msg )
485+ for shifted_var_c in VariantMapper ._var_c_shifts (
486+ self , var_c , alt_ac , alt_aln_method
487+ ):
488+ shifted_reference_data = RefTranscriptData (self .hdp , shifted_var_c .ac , pro_ac )
489+ shifted_builder = altseqbuilder .AltSeqBuilder (
490+ shifted_var_c , shifted_reference_data
491+ )
492+ shifted_region = shifted_builder .get_variant_region ()
493+ if shifted_region not in [shifted_builder .INTRON , shifted_builder .EXON ]:
494+ continue
495+ if original_region != shifted_region :
496+ # a shift is posible
497+ if self .shift_over_boundary_preference .name .lower () == shifted_region :
498+ # and that shift is preferred
499+ reference_data = shifted_reference_data
500+ builder = shifted_builder
501+ break
502+
459503 # TODO: handle case where you get 2+ alt sequences back;
460504 # currently get list of 1 element loop structure implemented
461505 # to handle this, but doesn't really do anything currently.
462506 all_alt_data = builder .build_altseq ()
463507
464508 var_ps = []
465509 for alt_data in all_alt_data :
466- builder = altseq_to_hgvsp .AltSeqToHgvsp (reference_data , alt_data )
467- var_p = builder .build_hgvsp ()
510+ hgvsp_builder = altseq_to_hgvsp .AltSeqToHgvsp (reference_data , alt_data )
511+ var_p = hgvsp_builder .build_hgvsp ()
468512 var_ps .append (var_p )
469513
470514 var_p = var_ps [0 ]
@@ -645,6 +689,82 @@ def _update_gene_symbol(self, var, symbol):
645689 var .gene = symbol
646690 return var
647691
692+ def _var_c_shifts (self , var_c , alt_ac , alt_aln_method ):
693+ """Try to shift c. variants to find alternative representations."""
694+ if not var_c .posedit or var_c .posedit .edit .type not in ("ins" , "dup" ):
695+ return
696+ strand = self ._fetch_AlignmentMapper (
697+ tx_ac = var_c .ac , alt_ac = alt_ac , alt_aln_method = alt_aln_method
698+ ).strand
699+ var_g = VariantMapper .c_to_g (self , var_c , alt_ac = alt_ac , alt_aln_method = alt_aln_method )
700+ for shifted_var_g in self ._var_g_shifts (
701+ var_g , strand = strand , alt_aln_method = alt_aln_method
702+ ):
703+ try :
704+ shifted_var_c = VariantMapper .g_to_c (
705+ self , shifted_var_g , tx_ac = var_c .ac , alt_aln_method = alt_aln_method
706+ )
707+ yield shifted_var_c
708+ except (
709+ HGVSInvalidVariantError ,
710+ HGVSInvalidIntervalError ,
711+ HGVSUnsupportedOperationError ,
712+ ):
713+ pass
714+
715+ def _var_g_shifts (self , var_g , strand , alt_aln_method ):
716+ """Try to shift g. variants to find alternative representations."""
717+ prev_var_g_strs = [str (var_g )]
718+ for shuffle_direction in [_SHUFFLE_3PRIME , _SHUFFLE_5PRIME ]:
719+ try :
720+ shifted_var_g = self ._var_g_shift_with_rewrite (
721+ var_g , shuffle_direction , strand , alt_aln_method
722+ )
723+ if str (shifted_var_g ) in prev_var_g_strs :
724+ continue
725+ prev_var_g_strs .append (str (shifted_var_g ))
726+ yield shifted_var_g
727+ except (
728+ HGVSInvalidVariantError ,
729+ HGVSInvalidIntervalError ,
730+ HGVSUnsupportedOperationError ,
731+ ):
732+ pass
733+
734+ def _var_g_shift_with_rewrite (self , var_g , shuffle_direction , strand , alt_aln_method ):
735+ """Attempt to shift a variant all the way left or right. Rewrite
736+ duplications as insertions so that the variant is shifted farther
737+ than would normally be possible using the HGVS notation."""
738+ var_g = copy .deepcopy (var_g )
739+ normalizer = hgvs .normalizer .Normalizer (
740+ self .hdp ,
741+ alt_aln_method = alt_aln_method ,
742+ validate = False ,
743+ shuffle_direction = shuffle_direction ,
744+ )
745+ var_g = normalizer .normalize (var_g )
746+ if var_g .posedit .edit .type == "dup" :
747+ self ._replace_reference (var_g )
748+ if (strand == 1 and shuffle_direction == _SHUFFLE_3PRIME ) or (
749+ strand == - 1 and shuffle_direction == _SHUFFLE_5PRIME
750+ ):
751+ var_g .posedit = hgvs .posedit .PosEdit (
752+ pos = hgvs .location .Interval (
753+ start = hgvs .location .SimplePosition (base = var_g .posedit .pos .start .base - 1 ),
754+ end = hgvs .location .SimplePosition (base = var_g .posedit .pos .start .base ),
755+ ),
756+ edit = hgvs .edit .NARefAlt (ref = None , alt = var_g .posedit .edit .ref ),
757+ )
758+ else :
759+ var_g .posedit = hgvs .posedit .PosEdit (
760+ pos = hgvs .location .Interval (
761+ start = hgvs .location .SimplePosition (base = var_g .posedit .pos .end .base ),
762+ end = hgvs .location .SimplePosition (base = var_g .posedit .pos .end .base + 1 ),
763+ ),
764+ edit = hgvs .edit .NARefAlt (ref = None , alt = var_g .posedit .edit .ref ),
765+ )
766+ return var_g
767+
648768
649769# <LICENSE>
650770# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
0 commit comments