Skip to content

Commit 7dafdfd

Browse files
committed
Support Version 2 Nomenclature
- Add initial support for importing V2 into db - Lookup V2 nomenclature
1 parent 5d7d42b commit 7dafdfd

File tree

5 files changed

+140
-8
lines changed

5 files changed

+140
-8
lines changed

pyard/data_repository.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# py-ard
4+
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
5+
#
6+
# This library is free software; you can redistribute it and/or modify it
7+
# under the terms of the GNU Lesser General Public License as published
8+
# by the Free Software Foundation; either version 3 of the License, or (at
9+
# your option) any later version.
10+
#
11+
# This library is distributed in the hope that it will be useful, but WITHOUT
12+
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
13+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14+
# License for more details.
15+
#
16+
# You should have received a copy of the GNU Lesser General Public License
17+
# along with this library; if not, write to the Free Software Foundation,
18+
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19+
#
20+
# > http://www.fsf.org/licensing/licenses/lgpl.html
21+
# > http://www.opensource.org/licenses/lgpl-license.php
22+
#
123
import functools
224
import sqlite3
325

@@ -299,3 +321,25 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
299321
# Save the serology mapping to db
300322
db.save_dict(db_connection, table_name='serology_mapping',
301323
dictionary=sero_mapping, columns=('serology', 'allele_list'))
324+
325+
326+
def generate_v2_to_v3_mapping(db_connection: sqlite3.Connection, imgt_version):
327+
if not db.table_exists(db_connection, 'v2_mapping'):
328+
# TODO: Create mapping table using both the allele list history and
329+
# deleted alleles as reference.
330+
# Temporary Example
331+
v2_to_v3_example = {
332+
"A*0104": "A*01:04N",
333+
"A*0105N": "A*01:04N",
334+
"A*0111": "A*01:11N",
335+
"A*01123": "A*01:123N",
336+
"A*0115": "A*01:15N",
337+
"A*0116": "A*01:16N",
338+
"A*01160": "A*01:160N",
339+
"A*01162": "A*01:162N",
340+
"A*01178": "A*01:178N",
341+
"A*01179": "A*01:179N",
342+
"DRB5*02ZB": "DRB5*02:UTV",
343+
}
344+
db.save_dict(db_connection, table_name='v2_mapping',
345+
dictionary=v2_to_v3_example, columns=('v2', 'v3'))

pyard/db.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# py-ard
4+
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
5+
#
6+
# This library is free software; you can redistribute it and/or modify it
7+
# under the terms of the GNU Lesser General Public License as published
8+
# by the Free Software Foundation; either version 3 of the License, or (at
9+
# your option) any later version.
10+
#
11+
# This library is distributed in the hope that it will be useful, but WITHOUT
12+
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
13+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14+
# License for more details.
15+
#
16+
# You should have received a copy of the GNU Lesser General Public License
17+
# along with this library; if not, write to the Free Software Foundation,
18+
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19+
#
20+
# > http://www.fsf.org/licensing/licenses/lgpl.html
21+
# > http://www.opensource.org/licenses/lgpl-license.php
22+
#
123
import pathlib
224
import sqlite3
325
from typing import Tuple, Dict, Set, List
@@ -92,7 +114,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
92114
:return: List of alleles
93115
"""
94116
serology_query = "SELECT allele_list from serology_mapping where serology = ?"
95-
cursor = connection.execute(serology_query, (serology, ))
117+
cursor = connection.execute(serology_query, (serology,))
96118
result = cursor.fetchone()
97119
cursor.close()
98120
if result:
@@ -102,6 +124,23 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
102124
return alleles
103125

104126

127+
def v2_to_v3_allele(connection: sqlite3.Connection, v2_allele: str) -> str:
128+
"""
129+
Look up V3 version of the allele in the database.
130+
131+
:param connection: db connection of type sqlite.Connection
132+
:param v2_allele: V2 allele
133+
:return: V3 allele
134+
"""
135+
v2_query = "SELECT v3 from v2_mapping where v2 = ?"
136+
cursor = connection.execute(v2_query, (v2_allele,))
137+
result = cursor.fetchone()
138+
cursor.close()
139+
if result:
140+
return result[0]
141+
return ''
142+
143+
105144
def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
106145
"""
107146
Check db if the MAC code exists.
@@ -215,4 +254,4 @@ def load_dict(connection: sqlite3.Connection, table_name: str, columns: Tuple[st
215254
cursor.execute(select_all_query)
216255
table_as_dict = {k: v for k, v in cursor.fetchall()}
217256
cursor.close()
218-
return table_as_dict
257+
return table_as_dict

pyard/pyard.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727

2828
from . import db
2929
from .data_repository import generate_ars_mapping, generate_mac_codes, generate_alleles_and_xx_codes, \
30-
generate_serology_mapping
31-
from .db import is_valid_mac_code, mac_code_to_alleles
30+
generate_serology_mapping, generate_v2_to_v3_mapping
31+
from .db import is_valid_mac_code, mac_code_to_alleles, v2_to_v3_allele
3232
from .smart_sort import smart_sort_comparator
3333

3434
HLA_regex = re.compile("^HLA-")
@@ -66,6 +66,8 @@ def __init__(self, imgt_version: str = 'Latest',
6666
self.dup_g, self._G, self._lg, self._lgx = generate_ars_mapping(self.db_connection, imgt_version)
6767
# Load Serology mappings
6868
generate_serology_mapping(self.db_connection, imgt_version)
69+
# Load V2 to V3 mappings
70+
generate_v2_to_v3_mapping(self.db_connection, imgt_version)
6971

7072
# Close the current read-write db connection
7173
self.db_connection.close()
@@ -172,6 +174,11 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
172174
return "/".join(sorted(set([self.redux_gl(a, redux_type) for a in glstring.split("/")]),
173175
key=functools.cmp_to_key(smart_sort_comparator)))
174176

177+
# Handle V2 to V3 mapping
178+
if self.is_v2(glstring):
179+
glstring = self._map_v2_to_v3(glstring)
180+
return self.redux_gl(glstring, redux_type)
181+
175182
# Handle Serology
176183
if self.is_serology(glstring):
177184
alleles = self._get_alleles_from_serology(glstring)
@@ -232,6 +239,17 @@ def is_mac(gl: str) -> bool:
232239
"""
233240
return re.search(r":\D+", gl) is not None
234241

242+
@staticmethod
243+
def is_v2(allele: str) -> bool:
244+
"""
245+
Version 2 of the nomenclature is a single field.
246+
It does not have any ':' field separator.
247+
Eg: A*0104
248+
:param allele: Possible allele
249+
:return: Is the allele in V2 nomenclature
250+
"""
251+
return '*' in allele and not ':' in allele
252+
235253
def _is_valid_allele(self, allele):
236254
"""
237255
Test if allele is valid in the current imgt database
@@ -255,7 +273,7 @@ def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
255273
# else it's a group expansion
256274
is_allelic_expansion = any([':' in allele for allele in alleles])
257275
if is_allelic_expansion:
258-
locus = locus_antigen.split('*')[0] # Just keep the locus name
276+
locus = locus_antigen.split('*')[0] # Just keep the locus name
259277
alleles = [f'{locus}*{a}' for a in alleles]
260278
else:
261279
alleles = [f'{locus_antigen}:{a}' for a in alleles]
@@ -272,6 +290,14 @@ def _get_alleles_from_serology(self, serology) -> Iterable[str]:
272290
else:
273291
return alleles
274292

293+
def _map_v2_to_v3(self, v2_allele):
294+
"""
295+
Get V3 version of V2 versioned allele
296+
:param v2_allele: V2 versioned allele
297+
:return: V3 versioned allele
298+
"""
299+
return v2_to_v3_allele(self.db_connection, v2_allele)
300+
275301
def isvalid(self, allele: str) -> bool:
276302
"""
277303
Determines validity of an allele
@@ -283,7 +309,9 @@ def isvalid(self, allele: str) -> bool:
283309
"""
284310
if allele == '':
285311
return False
286-
if not self.is_mac(allele) and not self.is_serology(allele):
312+
if not self.is_mac(allele) and \
313+
not self.is_serology(allele) and \
314+
not self.is_v2(allele):
287315
# Alleles ending with P or G are valid_alleles
288316
if allele.endswith(('P', 'G')):
289317
# remove the last character
@@ -330,7 +358,7 @@ def mac_toG(self, allele: str) -> str:
330358
"""
331359
locus_antigen, code = allele.split(":")
332360
if HLA_regex.search(allele):
333-
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
361+
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
334362
if is_valid_mac_code(self.db_connection, code):
335363
alleles = self._get_alleles(code, locus_antigen)
336364
group = [self.toG(a) for a in alleles]
@@ -370,7 +398,7 @@ def expand_mac(self, mac_code: str):
370398
locus_antigen, code = mac_code.split(":")
371399
if is_valid_mac_code(self.db_connection, code):
372400
if HLA_regex.search(mac_code):
373-
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
401+
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
374402
return ['HLA-' + a for a in self._get_alleles(code, locus_antigen)]
375403
else:
376404
return list(self._get_alleles(code, locus_antigen))

tests/features/version2.feature

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Feature: Version 2 Nomenclature
2+
3+
py-ard is able to reduce version 2 HLA nomenclature.
4+
5+
Scenario Outline:
6+
7+
Given the version 2 typing is <Version2>
8+
When reducing on the <Level> level (ambiguous)
9+
Then the reduced allele is found to be <Redux Allele>
10+
11+
12+
Examples: Valid A serology typings
13+
| Version2 | Level | Redux Allele |
14+
| A*0105N | G | A*01:01:01G |
15+
| A*0111 | G | A*01:11N |
16+
| DRB5*02ZB | G | DRB5*01:02:01G/DRB5*01:03/DRB5*02:02:01G/DRB5*02:03/DRB5*02:04 |

tests/steps/redux_allele.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,8 @@ def step_impl(context, redux_allele):
2727
@given("the serology typing is {serology}")
2828
def step_impl(context, serology):
2929
context.allele = serology
30+
31+
32+
@given("the version 2 typing is {v2_allele}")
33+
def step_impl(context, v2_allele):
34+
context.allele = v2_allele

0 commit comments

Comments
 (0)