Skip to content

Commit 84f0971

Browse files
authored
Merge pull request #276 from pbashyal-nmdp/allele-specific-mac-codes
Validation of allele specific MAC codes
2 parents b9e8220 + d7f788e commit 84f0971

File tree

6 files changed

+85
-35
lines changed

6 files changed

+85
-35
lines changed

pyard/ard.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import re
2626
import sqlite3
2727
import sys
28+
from collections import Counter
2829
from typing import Iterable, List
2930

3031
from . import broad_splits, smart_sort
@@ -408,8 +409,8 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
408409
)
409410

410411
# Handle MAC
411-
if self._config["reduce_MAC"] and self.is_mac(glstring):
412-
if db.is_valid_mac_code(self.db_connection, code):
412+
if self._config["reduce_MAC"] and code.isalpha():
413+
if self.is_mac(glstring): # Make sure it's a valid MAC
413414
if HLA_regex.search(glstring):
414415
# Remove HLA- prefix
415416
allele_name = glstring.split("-")[1]
@@ -436,12 +437,7 @@ def validate(self, glstring):
436437
:param glstring: GL String to validate
437438
:return: boolean indicating success
438439
"""
439-
try:
440-
return self._is_valid_gl(glstring)
441-
except InvalidAlleleError as e:
442-
raise InvalidTypingError(
443-
f"{glstring} is not valid GL String. \n {e.message}", e
444-
) from None
440+
return self._is_valid_gl(glstring)
445441

446442
def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
447443
if loc_antigen is None or code is None:
@@ -484,12 +480,39 @@ def is_mac(self, allele: str) -> bool:
484480
:return: True if MAC
485481
"""
486482
if ":" in allele:
487-
code = allele.split(":")[1]
488-
try:
483+
allele_split = allele.split(":")
484+
if len(allele_split) == 2: # MACs have only single :
485+
locus_antigen, code = allele_split
489486
if code.isalpha():
490-
return db.is_valid_mac_code(self.db_connection, code)
491-
except sqlite3.OperationalError as e:
492-
print("Error: ", e)
487+
try:
488+
alleles = db.mac_code_to_alleles(self.db_connection, code)
489+
if alleles:
490+
if any(map(lambda a: ":" in a, alleles)):
491+
# allele specific antigen codes have ':' in the MAC mapping
492+
# e.g. CFWRN -> 15:01/15:98/15:157/15:202/
493+
# 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220
494+
# Extract the antigens from the mapped alleles
495+
antigen_groups = map(lambda a: a.split(":")[0], alleles)
496+
# Rule 1: The 1st field with the most allele designations in the request is
497+
# the 1st field of the allele code designation
498+
# Rule 2: If there is a tie in the number of alleles designations sharing the 1st field,
499+
# the 1st field with the lowest numeric value is selected.
500+
antigen_counts = Counter(antigen_groups)
501+
# Create a table of antigen to it's counts
502+
# '15': 7
503+
# '35': 6
504+
# '51': 1
505+
# Valid antigen is the first most common one.
506+
# As it's presorted in db, also satisfies Rule 2.
507+
valid_antigen = antigen_counts.most_common(1).pop()[0]
508+
# Get antigen value 15 from 'DRB1*15'
509+
provided_antigen = locus_antigen.split("*").pop()
510+
# The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2
511+
return provided_antigen == valid_antigen
512+
# Valid when antigen group codes
513+
return True
514+
except sqlite3.OperationalError as e:
515+
print("Error: ", e)
493516
return False
494517

495518
def is_v2(self, allele: str) -> bool:
@@ -719,8 +742,8 @@ def expand_mac(self, mac_code: str):
719742
:return: GL String of expanded alleles
720743
:rtype: str
721744
"""
722-
locus_antigen, code = mac_code.split(":")
723-
if db.is_valid_mac_code(self.db_connection, code):
745+
if self.is_mac(mac_code): # Validate MAC first
746+
locus_antigen, code = mac_code.split(":")
724747
if HLA_regex.search(mac_code):
725748
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
726749
return "/".join(

pyard/db.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -172,21 +172,6 @@ def alleles_to_mac_code(
172172
return None
173173

174174

175-
def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
176-
"""
177-
Check db if the MAC code exists.
178-
179-
:param connection: db connection of type sqlite.Connection
180-
:param code: MAC code
181-
:return: code is MAC code ?
182-
"""
183-
mac_query = "SELECT count(alleles) from mac_codes where code = ?"
184-
cursor = connection.execute(mac_query, (code,))
185-
result = cursor.fetchone()
186-
cursor.close()
187-
return result[0] > 0
188-
189-
190175
def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
191176
"""
192177
Look up Serology in the database and return corresponding list of alleles.

scripts/pyard

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ if __name__ == "__main__":
155155
except InvalidTypingError as e:
156156
print("Typing Error:", e.message, file=sys.stderr)
157157
sys.exit(2)
158+
except InvalidMACError as e:
159+
print("MAC Error:", e.message, file=sys.stderr)
160+
sys.exit(3)
158161
else:
159162
# Remove ard and close db connection
160163
del ard

tests/features/mac.feature

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,26 @@ Feature: MAC (Multiple Allele Code)
4848
| A*01:01/A*01:02 | A*01:AB |
4949
| HLA-A*25:01/HLA-A*26:01 | HLA-A*25:BYHR |
5050
| HLA-A*02:01/HLA-A*02:09/HLA-A*02:43N | HLA-A*02:GNF |
51+
52+
53+
Scenario Outline: Validate allele specific antigen MACs
54+
55+
MAC validation rules for allele specific antigen codes:
56+
- The 1st field with the most allele designations in the request is
57+
the 1st field of the allele code designation
58+
- If there is a tie in the number of alleles designations sharing the 1st field,
59+
the 1st field with the lowest numeric value is selected.
60+
61+
62+
Given the MAC code is <MAC>
63+
When checking for validity of the MAC
64+
Then the validness is <Validity>
65+
66+
Examples:
67+
| MAC | Validity |
68+
| DRB1*07:DFJR | Invalid |
69+
| DRB1*15:DFJR | Valid |
70+
| DPB1*08:BHHE | Invalid |
71+
| DPB1*19:BHHE | Valid |
72+
| A*31:CMZEY | Invalid |
73+
| A*02:CMZEY | Valid |

tests/steps/mac.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from behave import *
22
from hamcrest import assert_that, is_
33

4+
from pyard.exceptions import InvalidAlleleError
5+
46

57
@given("the MAC code is {mac_code}")
68
def step_impl(context, mac_code):
@@ -30,3 +32,17 @@ def step_impl(context):
3032
@then("the decoded MAC is {mac_code}")
3133
def step_impl(context, mac_code):
3234
assert_that(context.mac_code, is_(mac_code))
35+
36+
37+
@when("checking for validity of the MAC")
38+
def step_impl(context):
39+
try:
40+
context.is_valid = context.ard.validate(context.mac_code)
41+
except InvalidAlleleError:
42+
context.is_valid = False
43+
44+
45+
@then("the validness is {validity}")
46+
def step_impl(context, validity):
47+
valid = validity == "Valid"
48+
assert_that(context.is_valid, is_(valid))

tests/test_pyard.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
import pyard
3636
from pyard.constants import DEFAULT_CACHE_SIZE
37-
from pyard.exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
37+
from pyard.exceptions import InvalidAlleleError
3838
from pyard.misc import validate_reduction_type
3939

4040

@@ -135,15 +135,15 @@ def test_redux_types(self):
135135
validate_reduction_type("XX")
136136

137137
def test_empty_allele(self):
138-
with self.assertRaises(InvalidTypingError):
138+
with self.assertRaises(InvalidAlleleError):
139139
self.ard.redux("A*", "lgx")
140140

141141
def test_fp_allele(self):
142-
with self.assertRaises(InvalidTypingError):
142+
with self.assertRaises(InvalidAlleleError):
143143
self.ard.redux("A*0.123", "lgx")
144144

145145
def test_empty_fields(self):
146-
with self.assertRaises(InvalidTypingError):
146+
with self.assertRaises(InvalidAlleleError):
147147
# : without any data
148148
self.ard.redux("DQA1*01:01:01:G", "lgx")
149149

@@ -152,7 +152,7 @@ def test_invalid_serology(self):
152152
serology_a10 = self.ard.redux("A10", "lgx")
153153
self.assertEqual(serology_a10.split("/")[0], "A*25:01")
154154
# And A100 isn't a valid typing
155-
with self.assertRaises(InvalidTypingError):
155+
with self.assertRaises(InvalidAlleleError):
156156
self.ard.redux("A100", "lgx")
157157

158158
def test_allele_duplicated(self):

0 commit comments

Comments
 (0)