Skip to content

Commit aaf8be8

Browse files
committed
Add type hints to assemblymapper.py
1 parent 5e1f29c commit aaf8be8

File tree

1 file changed

+63
-27
lines changed

1 file changed

+63
-27
lines changed

src/hgvs/assemblymapper.py

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from bioutils.sequences import TranslationTable
66

77
import hgvs
8+
from hgvs.alignmentmapper import AlignmentMapper
89
import hgvs.normalizer
910
from hgvs.exceptions import (
1011
HGVSDataNotAvailableError,
@@ -13,6 +14,8 @@
1314
HGVSUnsupportedOperationError,
1415
)
1516
from hgvs.variantmapper import VariantMapper
17+
from hgvs.sequencevariant import SequenceVariant
18+
from typing import Optional
1619

1720
_logger = logging.getLogger(__name__)
1821

@@ -50,7 +53,7 @@ class AssemblyMapper(VariantMapper):
5053

5154
def __init__(
5255
self,
53-
hdp,
56+
hdp: hgvs.dataproviders.interface.Interface,
5457
assembly_name=hgvs.global_config.mapping.assembly,
5558
alt_aln_method=hgvs.global_config.mapping.alt_aln_method,
5659
normalize=hgvs.global_config.mapping.normalize,
@@ -60,7 +63,7 @@ def __init__(
6063
add_gene_symbol=hgvs.global_config.mapping.add_gene_symbol,
6164
*args,
6265
**kwargs,
63-
):
66+
) -> None:
6467
"""
6568
:param object hdp: instance of hgvs.dataprovider subclass
6669
:param bool replace_reference: replace reference (entails additional network access)
@@ -88,44 +91,54 @@ def __init__(
8891
self.in_par_assume = in_par_assume
8992
self._norm = None
9093
if self.normalize:
91-
vm = VariantMapper(hdp=hdp, replace_reference=replace_reference,
92-
prevalidation_level=prevalidation_level,
93-
add_gene_symbol=add_gene_symbol)
94+
vm = VariantMapper(
95+
hdp=hdp,
96+
replace_reference=replace_reference,
97+
prevalidation_level=prevalidation_level,
98+
add_gene_symbol=add_gene_symbol,
99+
)
94100
self._norm = hgvs.normalizer.Normalizer(
95-
hdp, alt_aln_method=alt_aln_method, validate=False, variantmapper=vm,
101+
hdp,
102+
alt_aln_method=alt_aln_method,
103+
validate=False,
104+
variantmapper=vm,
96105
)
97106
self._assembly_map = {
98-
k: v for k, v in hdp.get_assembly_map(self.assembly_name).items() if k.startswith("NC_")
107+
k: v
108+
for k, v in hdp.get_assembly_map(self.assembly_name).items()
109+
if k.startswith("NC_")
99110
}
100111
self._assembly_accessions = set(self._assembly_map.keys())
101112

102-
def __repr__(self):
113+
def __repr__(self) -> str:
103114
return (
104115
"{self.__module__}.{t.__name__}(alt_aln_method={self.alt_aln_method}, "
105116
"assembly_name={self.assembly_name}, normalize={self.normalize}, "
106117
"prevalidation_level={self.prevalidation_level}, "
107-
"replace_reference={self.replace_reference})".format(self=self, t=type(self))
118+
"replace_reference={self.replace_reference})".format(
119+
self=self, t=type(self)
120+
)
108121
)
109122

110-
def g_to_c(self, var_g, tx_ac):
123+
def g_to_c(self, var_g: SequenceVariant, tx_ac: str) -> SequenceVariant:
111124
var_out = super(AssemblyMapper, self).g_to_c(
112125
var_g, tx_ac, alt_aln_method=self.alt_aln_method
113126
)
114127
return self._maybe_normalize(var_out)
115128

116-
def g_to_n(self, var_g, tx_ac):
129+
def g_to_n(self, var_g: SequenceVariant, tx_ac: str) -> SequenceVariant:
117130
var_out = super(AssemblyMapper, self).g_to_n(
118131
var_g, tx_ac, alt_aln_method=self.alt_aln_method
119132
)
120133
return self._maybe_normalize(var_out)
121134

122-
def g_to_t(self, var_g, tx_ac):
135+
def g_to_t(self, var_g: SequenceVariant, tx_ac: str) -> SequenceVariant:
123136
var_out = super(AssemblyMapper, self).g_to_t(
124137
var_g, tx_ac, alt_aln_method=self.alt_aln_method
125138
)
126139
return self._maybe_normalize(var_out)
127140

128-
def c_to_g(self, var_c):
141+
def c_to_g(self, var_c: SequenceVariant) -> SequenceVariant:
129142
alt_ac = self._alt_ac_for_tx_ac(var_c.ac)
130143
var_out = super(AssemblyMapper, self).c_to_g(
131144
var_c, alt_ac, alt_aln_method=self.alt_aln_method
@@ -146,7 +159,7 @@ def t_to_g(self, var_t):
146159
)
147160
return self._maybe_normalize(var_out)
148161

149-
def t_to_p(self, var_t):
162+
def t_to_p(self, var_t: SequenceVariant) -> SequenceVariant:
150163
"""Return a protein variant, or "non-coding" for non-coding variant types
151164
152165
CAUTION: Unlike other x_to_y methods that always return
@@ -165,36 +178,44 @@ def t_to_p(self, var_t):
165178
"Expected a coding (c.) or non-coding (n.) variant; got " + str(var_t)
166179
)
167180

168-
def c_to_n(self, var_c):
181+
def c_to_n(self, var_c: SequenceVariant) -> SequenceVariant:
169182
alt_ac = self._alt_ac_for_tx_ac(var_c.ac)
170183
var_out = super(AssemblyMapper, self).c_to_n(
171184
var_c, alt_ac=alt_ac, alt_aln_method=self.alt_aln_method
172185
)
173186
return self._maybe_normalize(var_out)
174187

175-
def n_to_c(self, var_n):
188+
def n_to_c(self, var_n: SequenceVariant) -> SequenceVariant:
176189
alt_ac = self._alt_ac_for_tx_ac(var_n.ac)
177190
var_out = super(AssemblyMapper, self).n_to_c(
178191
var_n, alt_ac=alt_ac, alt_aln_method=self.alt_aln_method
179192
)
180193
return self._maybe_normalize(var_out)
181194

182-
def c_to_p(self, var_c, translation_table=TranslationTable.standard):
195+
def c_to_p(
196+
self, var_c: SequenceVariant, translation_table=TranslationTable.standard
197+
) -> SequenceVariant:
183198
alt_ac = self._alt_ac_for_tx_ac(var_c.ac)
184199
var_out = super(AssemblyMapper, self).c_to_p(
185-
var_c, alt_ac=alt_ac, alt_aln_method=self.alt_aln_method, translation_table=translation_table
200+
var_c,
201+
alt_ac=alt_ac,
202+
alt_aln_method=self.alt_aln_method,
203+
translation_table=translation_table,
186204
)
187205
return self._maybe_normalize(var_out)
188206

189-
def relevant_transcripts(self, var_g):
207+
def relevant_transcripts(self, var_g: SequenceVariant) -> SequenceVariant:
190208
"""return list of transcripts accessions (strings) for given variant,
191209
selected by genomic overlap"""
192210
tx = self.hdp.get_tx_for_region(
193-
var_g.ac, self.alt_aln_method, var_g.posedit.pos.start.base, var_g.posedit.pos.end.base
211+
var_g.ac,
212+
self.alt_aln_method,
213+
var_g.posedit.pos.start.base,
214+
var_g.posedit.pos.end.base,
194215
)
195216
return [e["tx_ac"] for e in tx]
196217

197-
def _alt_ac_for_tx_ac(self, tx_ac):
218+
def _alt_ac_for_tx_ac(self, tx_ac: str) -> str:
198219
"""return chromosomal accession for given transcript accession (and
199220
the_assembly and aln_method setting used to instantiate this
200221
AssemblyMapper)
@@ -219,12 +240,18 @@ def _alt_ac_for_tx_ac(self, tx_ac):
219240
names = set(self._assembly_map[ac] for ac in alt_acs)
220241
if names != set("XY"):
221242
alts = ", ".join(
222-
["{ac} ({n})".format(ac=ac, n=self._assembly_map[ac]) for ac in alt_acs]
243+
[
244+
"{ac} ({n})".format(ac=ac, n=self._assembly_map[ac])
245+
for ac in alt_acs
246+
]
223247
)
224248
raise HGVSError(
225249
"Multiple chromosomal alignments for {tx_ac} in {an}"
226250
" using {am} (non-pseudoautosomal region) [{alts}]".format(
227-
tx_ac=tx_ac, an=self.assembly_name, am=self.alt_aln_method, alts=alts
251+
tx_ac=tx_ac,
252+
an=self.assembly_name,
253+
am=self.alt_aln_method,
254+
alts=alts,
228255
)
229256
)
230257

@@ -237,7 +264,9 @@ def _alt_ac_for_tx_ac(self, tx_ac):
237264
)
238265
)
239266

240-
alt_acs = [ac for ac in alt_acs if self._assembly_map[ac] == self.in_par_assume]
267+
alt_acs = [
268+
ac for ac in alt_acs if self._assembly_map[ac] == self.in_par_assume
269+
]
241270
if len(alt_acs) != 1:
242271
raise HGVSError(
243272
"Multiple chromosomal alignments for {tx_ac} in {an}"
@@ -253,7 +282,12 @@ def _alt_ac_for_tx_ac(self, tx_ac):
253282
assert len(alt_acs) == 1, "Should have exactly one alignment at this point"
254283
return alt_acs[0]
255284

256-
def _fetch_AlignmentMapper(self, tx_ac, alt_ac=None, alt_aln_method=None):
285+
def _fetch_AlignmentMapper(
286+
self,
287+
tx_ac: str,
288+
alt_ac: Optional[str] = None,
289+
alt_aln_method: Optional[str] = None,
290+
) -> AlignmentMapper:
257291
"""convenience version of VariantMapper._fetch_AlignmentMapper that
258292
derives alt_ac from transcript, assembly, and alt_aln_method
259293
used to instantiate the AssemblyMapper instance
@@ -264,9 +298,11 @@ def _fetch_AlignmentMapper(self, tx_ac, alt_ac=None, alt_aln_method=None):
264298
alt_ac = self._alt_ac_for_tx_ac(tx_ac)
265299
if alt_aln_method is None:
266300
alt_aln_method = self.alt_aln_method
267-
return super(AssemblyMapper, self)._fetch_AlignmentMapper(tx_ac, alt_ac, alt_aln_method)
301+
return super(AssemblyMapper, self)._fetch_AlignmentMapper(
302+
tx_ac, alt_ac, alt_aln_method
303+
)
268304

269-
def _maybe_normalize(self, var):
305+
def _maybe_normalize(self, var: SequenceVariant) -> SequenceVariant:
270306
"""normalize variant if requested, and ignore HGVSUnsupportedOperationError
271307
This is better than checking whether the variant is intronic because
272308
future UTAs will support LRG, which will enable checking intronic variants.

0 commit comments

Comments
 (0)