Skip to content

Commit 9a7275b

Browse files
committed
Fix ins/dups where splice region is preserved
1 parent e6dbd1e commit 9a7275b

File tree

5 files changed

+141
-2
lines changed

5 files changed

+141
-2
lines changed

src/hgvs/assemblymapper.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
HGVSError,
1010
HGVSInvalidVariantError,
1111
HGVSUnsupportedOperationError,
12+
HGVSInvalidIntervalError,
1213
)
1314
from hgvs.variantmapper import VariantMapper
15+
from hgvs.posedit import PosEdit
16+
from hgvs.edit import NARefAlt
17+
from hgvs.location import SimplePosition, Interval
1418

1519
_logger = logging.getLogger(__name__)
1620

@@ -172,7 +176,31 @@ def n_to_c(self, var_n):
172176
return self._maybe_normalize(var_out)
173177

174178
def c_to_p(self, var_c):
175-
var_out = super(AssemblyMapper, self).c_to_p(var_c)
179+
var_out = super(AssemblyMapper, self)._c_to_p(var_c)
180+
181+
if (
182+
var_c.posedit.edit.type in ['ins', 'dup']
183+
and var_c.type in "cnr"
184+
and var_c.posedit.pos is not None
185+
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
186+
and var_out.posedit is None
187+
):
188+
try:
189+
var_g = self.c_to_g(var_c)
190+
strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand
191+
192+
for shuffle_direction in [3, 5]:
193+
shifted_var_g = self._far_shift(var_g, shuffle_direction, strand)
194+
shifted_var_c = super(AssemblyMapper, self).g_to_c(
195+
shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method
196+
)
197+
var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c)
198+
199+
if var_out.posedit is not None:
200+
break
201+
except (HGVSInvalidVariantError, HGVSInvalidIntervalError, HGVSUnsupportedOperationError):
202+
pass
203+
176204
return self._maybe_normalize(var_out)
177205

178206
def relevant_transcripts(self, var_g):
@@ -268,6 +296,34 @@ def _maybe_normalize(self, var):
268296
# fall through to return unnormalized variant
269297
return var
270298

299+
def _far_shift(self, var_g, shuffle_direction, strand):
300+
"""Attempt to shift a variant all the way left or right. Rewrite
301+
duplications as insertions so that the change is shifted as far as
302+
possible."""
303+
normalizer = hgvs.normalizer.Normalizer(
304+
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction
305+
)
306+
shifted_var_g = normalizer.normalize(var_g)
307+
if shifted_var_g.posedit.edit.type == 'dup':
308+
self._replace_reference(shifted_var_g)
309+
if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5):
310+
shifted_var_g.posedit = PosEdit(
311+
pos=Interval(
312+
start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1),
313+
end=SimplePosition(base=shifted_var_g.posedit.pos.start.base),
314+
),
315+
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
316+
)
317+
else:
318+
shifted_var_g.posedit = PosEdit(
319+
pos=Interval(
320+
start=SimplePosition(base=shifted_var_g.posedit.pos.end.base),
321+
end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1),
322+
),
323+
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
324+
)
325+
return shifted_var_g
326+
271327

272328
# <LICENSE>
273329
# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)

src/hgvs/variantmapper.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,21 @@ def c_to_p(self, var_c, pro_ac=None):
427427
428428
"""
429429

430+
var_p = self._c_to_p(var_c, pro_ac=None)
431+
432+
if (
433+
var_c.posedit.edit.type in ['ins', 'dup']
434+
and var_c.type in "cnr"
435+
and var_c.posedit.pos is not None
436+
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
437+
and var_p.posedit is None
438+
):
439+
raise HGVSUnsupportedOperationError('c_to_p not supported on VariantMapper for this var_c, try AssemblyMapper')
440+
441+
return var_p
442+
443+
444+
def _c_to_p(self, var_c, pro_ac=None):
430445
if not (var_c.type == "c"):
431446
raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c))
432447
if self._validator:

tests/data/gcp/real.tsv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,5 @@ ID00056 NC_000010.10:g.89693009delG NM_000314.4:c.492+1delG NP_000305.3:p.?
5858
ID00057 NC_000010.10:g.89711873A>C NM_000314.4:c.493-2A>C NP_000305.3:p.?
5959
ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln)
6060
ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.?
61-
ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.?
6261
ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1?
6362
ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*)

tests/issues/test_714.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
3+
import hgvs
4+
import pytest
5+
from support import CACHE
6+
7+
cases = [
8+
{
9+
"name": "ins with splice region preserved",
10+
"var_c": "NM_004119.2:c.1837+21_1837+22insCGAGAGAATATGAATATGATCTCAAATGGGAGTTTCCAAGAGAAAATTTAGAGTTTGGTAAGAATGGAATGTGCCAAA",
11+
"var_p": "NP_004110.2:p.(Lys614_Val615insAsnGlyMetCysGlnThrArgGluTyrGluTyrAspLeuLysTrpGluPheProArgGluAsnLeuGluPheGlyLys)"
12+
},
13+
{
14+
"name": "dup with splice region preserved",
15+
"var_c": "NM_004119.2:c.1835_1837+3dup",
16+
"var_p": "NP_004110.2:p.(Gly613_Lys614insIleGly)"
17+
},
18+
{
19+
"name": "dup with splice region preserved",
20+
"var_c": "NM_005228.4:c.2284-5_2290dup",
21+
"var_p": "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)"
22+
},
23+
{
24+
"name": "dup with splice region preserved",
25+
"var_c": "NM_004456.4:c.2196-1_2196dup",
26+
"var_p": "NP_004447.2:p.(Tyr733AspfsTer8)"
27+
},
28+
{
29+
"name": "dup with splice region preserved",
30+
"var_c": "NM_016222.3:c.27+2_27+5dup",
31+
"var_p": "NP_057306.2:p.(Arg10ValfsTer20)"
32+
},
33+
{
34+
"name": "dup with splice region preserved",
35+
"var_c": "NM_182758.2:c.2953-31_2953-26dup",
36+
"var_p": "NP_877435.2:p.?"
37+
},
38+
{
39+
"name": "dup with broken cigar mapping",
40+
"var_c": "NM_000267.3:c.8315-290_8457dup",
41+
"var_p": "NP_000258.1:p.?"
42+
}
43+
]
44+
45+
46+
@pytest.fixture(scope="module")
47+
def hp():
48+
return hgvs.parser.Parser()
49+
50+
51+
@pytest.fixture(scope="module")
52+
def hdp():
53+
return hgvs.dataproviders.uta.connect(
54+
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE
55+
)
56+
57+
58+
@pytest.fixture(scope="module")
59+
def am37(hdp):
60+
return hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name="GRCh37")
61+
62+
63+
@pytest.mark.parametrize("case", cases)
64+
def test_real_c_to_p(case, hp, am37):
65+
var_c = hp.parse(case["var_c"])
66+
assert str(am37.c_to_p(var_c)) == case["var_p"]

tests/support/mock_input_source.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def get_tx_seq(self, ac):
5353
def get_seq(self, ac, start_i=None, end_i=None):
5454
return self.get_tx_seq(ac)[start_i:end_i]
5555

56+
def get_pro_ac_for_tx_ac(self, ac):
57+
return 'MOCK'
58+
5659
#
5760
# internal methods
5861
#

0 commit comments

Comments
 (0)