Skip to content

Commit 95a4ee9

Browse files
authored
Merge pull request #238 from pbashyal-nmdp/serology_redux
Support Reduction to Serology
2 parents 3ef24f7 + 8764f4c commit 95a4ee9

File tree

8 files changed

+74
-3
lines changed

8 files changed

+74
-3
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ ard.redux('B14', 'lg')
176176
| `W` | Reduce/Expand to 3 field WHO nomenclature level |
177177
| `exon` | Reduce/Expand to exon level |
178178
| `U2` | Reduce to 2 field unambiguous level |
179+
| `S` | Reduce to Serological level |
179180

180181
### Perform DRB1 blending with DRB3, DRB4 and DRB5
181182

pyard/ard.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .misc import (
3434
get_n_field_allele,
3535
get_2field_allele,
36+
is_2_field_allele,
3637
validate_reduction_type,
3738
)
3839
from .constants import (
@@ -253,6 +254,24 @@ def _redux_allele(
253254
else:
254255
# If ambiguous, reduce to G group level
255256
return self._redux_allele(allele, "lgx")
257+
elif redux_type == "S":
258+
# find serology equivalent in serology_mapping
259+
serology_mapping = db.find_serology_for_allele(self.db_connection, allele)
260+
serology_set = set()
261+
if is_2_field_allele(allele):
262+
for serology, allele_list in serology_mapping.items():
263+
allele_list_lgx = self.redux(allele_list, "lgx")
264+
if allele in allele_list_lgx:
265+
serology_set.add(serology)
266+
else:
267+
for serology, allele_list in serology_mapping.items():
268+
if allele in allele_list:
269+
serology_set.add(serology)
270+
return "/".join(
271+
sorted(
272+
serology_set, key=functools.cmp_to_key(self.smart_sort_comparator)
273+
)
274+
)
256275
else:
257276
# Make this an explicit lookup to the g_group or p_group table
258277
# for stringent validation
@@ -293,7 +312,7 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
293312
all_gls = []
294313
for gl in gls:
295314
all_gls += gl.split(delim)
296-
unique_gls = set(all_gls)
315+
unique_gls = filter(lambda s: s != "", set(all_gls))
297316
return delim.join(
298317
sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
299318
)

pyard/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
HLA_regex = re.compile("^HLA-")
2727

28-
VALID_REDUCTION_TYPES = ["G", "P", "lg", "lgx", "W", "exon", "U2"]
28+
VALID_REDUCTION_TYPES = ["G", "P", "lg", "lgx", "W", "exon", "U2", "S"]
2929
expression_chars = ["N", "Q", "L", "S"]
3030
# List of P and G characters
3131
PandG_chars = ["P", "G"]

pyard/db.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,27 @@ def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str
370370
return alleles
371371

372372

373+
def find_serology_for_allele(
374+
connection: sqlite3.Connection, allele_name: str
375+
) -> Dict[str, str]:
376+
"""
377+
Find similar alleles starting with the provided allele_name.
378+
379+
:param connection: db connection of type sqlite.Connection
380+
:param allele_name: Allele name to use as a prefix to find similar alleles
381+
:return: list of similar alleles
382+
"""
383+
query = (
384+
"SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?"
385+
)
386+
cursor = connection.execute(query, (f"%{allele_name}%",))
387+
results = cursor.fetchall()
388+
# fetchall() returns a list of tuples of results
389+
# e.g. [('A1', ''A*01:01:01:01/A*01:01:01:03')]
390+
serology_mapping = {serology: allele_list for serology, allele_list in results}
391+
return serology_mapping
392+
393+
373394
def get_user_version(connection: sqlite3.Connection) -> int:
374395
"""
375396
Retrieve user_version from db

pyard/misc.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ def number_of_fields(allele: str) -> int:
6767
return len(allele.split(":"))
6868

6969

70+
def is_2_field_allele(allele: str) -> bool:
71+
return number_of_fields(allele) == 2
72+
73+
7074
# computes a valid G name based on the ambiguity string
7175
def get_G_name(a: str) -> str:
7276
a = a.split("/")[0]

pyard/smart_sort.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,11 @@ def smart_sort_comparator(a1, a2):
6161
if a1 == a2:
6262
return 0
6363

64-
# Extract and Compare first fields first
64+
# Handle serology
65+
if ":" not in a1:
66+
return 1 if a1 > a2 else -1
67+
68+
# Extract and Compare 1st fields first
6569
a1_f1 = int(a1[a1.find("*") + 1 : a1.find(":")])
6670
a2_f1 = int(a2[a2.find("*") + 1 : a2.find(":")])
6771

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
toml==0.10.2
2+
numpy==1.24.3
23
pandas==1.5.3
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Feature: Serology Reduction
2+
3+
For a GL String, find the serological equivalents
4+
Serological reduction mode is *S*
5+
6+
Scenario Outline: Serology Reduction
7+
8+
Given the allele as <Allele>
9+
When reducing on the <Level> level with ping
10+
Then the reduced allele is found to be <Redux Allele>
11+
12+
Examples:
13+
| Allele | Level | Redux Allele |
14+
| A*01:01:01:01 | S | A1 |
15+
| A*01:01 | S | A1 |
16+
| A*01:AABJE | S | A1/A36 |
17+
| A*03:XX | S | A3 |
18+
| B*44:02:01:11/B*44:02:01:12 | S | B12/B44 |
19+
| B*13:03 | S | B13 |
20+
| B*13:04 | S | B15/B21 |
21+
| B*15:01/B*15:02/B*15:03/B*15:04 | S | B15/B62/B70/B72/B75 |

0 commit comments

Comments
 (0)