Skip to content

Commit 2fbe8fa

Browse files
authored
Merge pull request #310 from pbashyal-nmdp/serology_to_xx
Add XX mappings of Serology
2 parents f577bd9 + 4072cd4 commit 2fbe8fa

File tree

6 files changed

+219
-92
lines changed

6 files changed

+219
-92
lines changed

pyard/ard.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def __init__(
124124
dr.generate_serology_mapping(
125125
self.db_connection, imgt_version, self.serology_mapping, self._redux_allele
126126
)
127-
self.valid_serology_set = dr.build_valid_serology_set(self.db_connection)
127+
self.valid_serology_set = SerologyMapping.get_valid_serology_names()
128128

129129
# Load V2 to V3 mappings
130130
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
@@ -436,16 +436,23 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
436436
is_hla_prefix = HLA_regex.search(loc_antigen)
437437
if is_hla_prefix:
438438
loc_antigen = loc_antigen.split("-")[1]
439-
if self.is_XX(glstring, loc_antigen, code):
440-
if is_hla_prefix:
441-
reduced_alleles = self.redux(
442-
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
443-
)
444-
return "/".join(["HLA-" + a for a in reduced_alleles.split("/")])
439+
if code == "XX":
440+
if self.is_XX(glstring, loc_antigen, code):
441+
if is_hla_prefix:
442+
reduced_alleles = self.redux(
443+
"/".join(self.code_mappings.xx_codes[loc_antigen]),
444+
redux_type,
445+
)
446+
return "/".join(
447+
["HLA-" + a for a in reduced_alleles.split("/")]
448+
)
449+
else:
450+
return self.redux(
451+
"/".join(self.code_mappings.xx_codes[loc_antigen]),
452+
redux_type,
453+
)
445454
else:
446-
return self.redux(
447-
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
448-
)
455+
raise InvalidTypingError(f"{glstring} is not valid XX code")
449456

450457
# Handle MAC
451458
if self._config["reduce_MAC"] and code.isalpha():
@@ -633,7 +640,13 @@ def find_broad_splits(self, allele) -> tuple:
633640
return self.serology_mapping.find_splits(allele)
634641

635642
def find_associated_antigen(self, serology) -> str:
636-
return self.serology_mapping.serology_associated_map.get(serology, serology)
643+
return self.serology_mapping.find_associated_antigen(serology)
644+
645+
@functools.lru_cache()
646+
def find_xx_from_serology(self, serology):
647+
if self.is_serology(serology):
648+
return db.find_xx_for_serology(self.db_connection, serology)
649+
raise InvalidAlleleError(f"{serology} is not a valid serology")
637650

638651
def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
639652
"""

pyard/data_repository.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,14 @@
2727
import pyard.load
2828
from pyard.smart_sort import smart_sort_comparator
2929
from . import db
30-
from .serology import broad_splits_dna_mapping, get_all_valid_serology_names
30+
from .constants import expression_chars
3131
from .load import (
3232
load_g_group,
3333
load_p_group,
3434
load_allele_list,
3535
load_serology_mappings,
3636
load_latest_version,
3737
)
38-
from .constants import expression_chars
3938
from .mappings import (
4039
ars_mapping_tables,
4140
ARSMapping,
@@ -50,6 +49,7 @@
5049
number_of_fields,
5150
get_1field_allele,
5251
)
52+
from .serology import broad_splits_dna_mapping, SerologyMapping
5353

5454

5555
def expression_reduce(df):
@@ -356,7 +356,10 @@ def to_serological_name(locus_name: str):
356356

357357

358358
def generate_serology_mapping(
359-
db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function
359+
db_connection: sqlite3.Connection,
360+
imgt_version: str,
361+
serology_mapping: SerologyMapping,
362+
redux_function,
360363
):
361364
if not db.table_exists(db_connection, "serology_mapping"):
362365
df_sero = load_serology_mappings(imgt_version)
@@ -412,22 +415,28 @@ def generate_serology_mapping(
412415
if split in sero_mapping:
413416
sero_mapping[broad] = sero_mapping[split]
414417

415-
# re-sort allele lists into smartsort order
416-
for sero in sero_mapping.keys():
417-
sero_mapping[sero] = (
418-
"/".join(
419-
sorted(
420-
sero_mapping[sero][0],
421-
key=functools.cmp_to_key(smart_sort_comparator),
422-
)
423-
),
424-
"/".join(
425-
sorted(
426-
sero_mapping[sero][1],
427-
key=functools.cmp_to_key(smart_sort_comparator),
418+
# Create a mapping of serology to alleles, lgx_alleles and associated XX allele
419+
serology_xx_mapping = serology_mapping.get_xx_mappings()
420+
# re-sort allele lists into smart-sort order
421+
for sero in serology_xx_mapping:
422+
if sero in sero_mapping:
423+
sero_mapping[sero] = (
424+
"/".join(
425+
sorted(
426+
sero_mapping[sero][0],
427+
key=functools.cmp_to_key(smart_sort_comparator),
428+
)
429+
),
430+
"/".join(
431+
sorted(
432+
sero_mapping[sero][1],
433+
key=functools.cmp_to_key(smart_sort_comparator),
434+
),
428435
),
429-
),
430-
)
436+
serology_xx_mapping[sero],
437+
)
438+
else:
439+
sero_mapping[sero] = (None, None, serology_xx_mapping[sero])
431440

432441
db.save_serology_mappings(db_connection, sero_mapping)
433442

@@ -483,12 +492,3 @@ def generate_cwd_mapping(db_connection: sqlite3.Connection):
483492
if not db.table_exists(db_connection, "cwd2"):
484493
cwd2_map = pyard.load.load_cwd2()
485494
db.save_cwd2(db_connection, cwd2_map)
486-
487-
488-
def build_valid_serology_set(db_connection: sqlite3.Connection):
489-
valid_serology_names = get_all_valid_serology_names()
490-
# Save to db if `valid_serology` table is not present
491-
if not db.table_exists(db_connection, "valid_serology"):
492-
db.save_set(db_connection, "valid_serology", valid_serology_names, "serology")
493-
494-
return set(valid_serology_names)

pyard/db.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,22 @@ def find_serology_for_allele(
412412
return serology_mapping
413413

414414

415+
def find_xx_for_serology(connection: sqlite3.Connection, serology: str) -> str:
416+
"""
417+
Find the corresponding XX allele for the given serology
418+
419+
:param connection: db connection of type sqlite.Connection
420+
:param serology: serology for which to find XX allele
421+
:return: XX allele for given serology
422+
"""
423+
query = f"SELECT xx FROM serology_mapping WHERE serology = ?"
424+
cursor = connection.execute(query, (serology,))
425+
results = cursor.fetchone()
426+
if results:
427+
return results[0]
428+
return None
429+
430+
415431
def get_user_version(connection: sqlite3.Connection) -> int:
416432
"""
417433
Retrieve user_version from db
@@ -424,9 +440,7 @@ def get_user_version(connection: sqlite3.Connection) -> int:
424440
version = result[0]
425441
cursor.close()
426442

427-
if version:
428-
return version
429-
return None
443+
return version
430444

431445

432446
def set_user_version(connection: sqlite3.Connection, version: int):
@@ -579,15 +593,16 @@ def save_serology_mappings(db_connection, sero_mapping):
579593
# Create table
580594
create_table_sql = f"""CREATE TABLE serology_mapping (
581595
serology TEXT PRIMARY KEY,
582-
allele_list TEXT NOT NULL,
583-
lgx_allele_list TEXT NOT NULL
596+
allele_list TEXT,
597+
lgx_allele_list TEXT,
598+
xx TEXT NOT NULL
584599
)"""
585600
cursor.execute(create_table_sql)
586601

587-
rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())
602+
rows = ((k, v[0], v[1], v[2]) for k, v in sero_mapping.items())
588603

589604
# insert
590-
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)
605+
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?, ?)", rows)
591606

592607
# commit transaction - writes to the db
593608
db_connection.commit()

pyard/serology.py

Lines changed: 99 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,72 @@
2020
# > http://www.fsf.org/licensing/licenses/lgpl.html
2121
# > http://www.opensource.org/licenses/lgpl-license.php
2222
#
23-
from pyard.constants import HLA_regex
23+
import re
2424

25+
from pyard.constants import HLA_regex
2526

2627
#
2728
# HLA Antigens
2829
# List of all recognised serological collected from:
2930
# https://hla.alleles.org/antigens/recognised_serology.html
3031
#
31-
def get_all_valid_serology_names():
32+
33+
34+
# -#
35+
# Broad, Splits and Associated Antigens
36+
# http://hla.alleles.org/antigens/broads_splits.html
37+
#
38+
#
39+
# Mapping Generated from `dna_relshp.csv` file
40+
#
41+
broad_splits_dna_mapping = {
42+
"A*09": ["A*23", "A*24"],
43+
"A*10": ["A*25", "A*26", "A*34", "A*66"],
44+
"A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"],
45+
"A*28": ["A*68", "A*69"],
46+
"B*05": ["B*51", "B*52"],
47+
"B*12": ["B*44", "B*45"],
48+
"B*16": ["B*38", "B*39"],
49+
"B*17": ["B*57", "B*58"],
50+
"B*21": ["B*49", "B*50"],
51+
"B*22": ["B*54", "B*55", "B*56"],
52+
"C*10": ["C*03", "C*04"],
53+
"DQB1*01": ["DQB1*05", "DQB1*06"],
54+
"DRB1*02": ["DRB1*15", "DRB1*16"],
55+
"DRB1*06": ["DRB1*13", "DRB1*14"],
56+
}
57+
58+
serology_xx_exception_mapping = {
59+
# Locus B
60+
# Broad B40
61+
"B60": "B*40:XX",
62+
"B61": "B*40:XX",
63+
# Broad B14
64+
"B64": "B*14:XX",
65+
"B65": "B*14:XX",
66+
# Broad B15
67+
"B62": "B*15:XX",
68+
"B63": "B*15:XX",
69+
"B70": "B*15:XX",
70+
"B75": "B*15:XX",
71+
"B76": "B*15:XX",
72+
"B77": "B*15:XX",
73+
# Broad B70
74+
"B71": "B*15:XX",
75+
"B72": "B*15:XX",
76+
"DR17": "DRB1*03:XX",
77+
"DR18": "DRB1*03:XX",
78+
# Locus DQB1
79+
# Broad DQ3
80+
"DQ7": "DQB1*03:XX",
81+
"DQ8": "DQB1*03:XX",
82+
"DQ9": "DQB1*03:XX",
83+
}
84+
85+
sero_antigen_regex = re.compile(r"(\D+)(\d+)")
86+
87+
88+
class SerologyMapping:
3289
valid_serology_map = {
3390
"A": [
3491
"A1",
@@ -153,7 +210,7 @@ def get_all_valid_serology_names():
153210
"Dw25",
154211
"Dw26",
155212
],
156-
"DR": [
213+
"DRB1": [
157214
"DR1",
158215
"DR103",
159216
"DR2",
@@ -179,40 +236,10 @@ def get_all_valid_serology_names():
179236
"DR52",
180237
"DR53",
181238
],
182-
"DQ": ["DQ1", "DQ2", "DQ3", "DQ4", "DQ5", "DQ6", "DQ7", "DQ8", "DQ9"],
183-
"DP": ["DPw1", "DPw2", "DPw3", "DPw4", "DPw5", "DPw6"],
239+
"DQB1": ["DQ1", "DQ2", "DQ3", "DQ4", "DQ5", "DQ6", "DQ7", "DQ8", "DQ9"],
240+
"DPB1": ["DPw1", "DPw2", "DPw3", "DPw4", "DPw5", "DPw6"],
184241
}
185242

186-
all_serology_names = [x for v in valid_serology_map.values() for x in v]
187-
return all_serology_names
188-
189-
190-
# -#
191-
# Broad, Splits and Associated Antigens
192-
# http://hla.alleles.org/antigens/broads_splits.html
193-
#
194-
#
195-
# Mapping Generated from `dna_relshp.csv` file
196-
#
197-
broad_splits_dna_mapping = {
198-
"A*09": ["A*23", "A*24"],
199-
"A*10": ["A*25", "A*26", "A*34", "A*66"],
200-
"A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"],
201-
"A*28": ["A*68", "A*69"],
202-
"B*05": ["B*51", "B*52"],
203-
"B*12": ["B*44", "B*45"],
204-
"B*16": ["B*38", "B*39"],
205-
"B*17": ["B*57", "B*58"],
206-
"B*21": ["B*49", "B*50"],
207-
"B*22": ["B*54", "B*55", "B*56"],
208-
"C*10": ["C*03", "C*04"],
209-
"DQB1*01": ["DQB1*05", "DQB1*06"],
210-
"DRB1*02": ["DRB1*15", "DRB1*16"],
211-
"DRB1*06": ["DRB1*13", "DRB1*14"],
212-
}
213-
214-
215-
class SerologyMapping:
216243
def __init__(self, broad_splits_mapping, associated_mapping):
217244
self.broad_splits_map = broad_splits_mapping
218245
self.serology_associated_map = associated_mapping
@@ -237,8 +264,43 @@ def find_splits(self, allele: str) -> tuple:
237264
if allele_name in mapping[broad]:
238265
return self._get_mapping(broad, mapping, prefix)
239266

240-
@staticmethod
241-
def _get_mapping(broad, mapping, prefix):
267+
def find_associated_antigen(self, serology):
268+
return self.serology_associated_map.get(serology, serology)
269+
270+
def get_xx_mappings(self):
271+
all_xx_mappings = {}
272+
for locus, serologies in SerologyMapping.valid_serology_map.items():
273+
xx_mapping = {
274+
serology: self._map_serology_to_xx(locus, serology)
275+
for serology in serologies
276+
}
277+
all_xx_mappings.update(xx_mapping)
278+
return all_xx_mappings
279+
280+
@classmethod
281+
def get_valid_serology_names(cls):
282+
all_serology_names = {x for v in cls.valid_serology_map.values() for x in v}
283+
return all_serology_names
284+
285+
def _map_serology_to_xx(self, locus, serology):
286+
if serology in serology_xx_exception_mapping.keys():
287+
return serology_xx_exception_mapping[serology]
288+
289+
# Use the associated serology for XX version
290+
serology = self.find_associated_antigen(serology)
291+
292+
# Extract just the digits
293+
antigen_group = sero_antigen_regex.match(serology).group(2)
294+
# Pad numbers with 0 for single digit numbers
295+
antigen_group_num = int(antigen_group)
296+
if antigen_group_num < 10:
297+
antigen_group = f"{antigen_group_num:02}"
298+
299+
# Build the XX allele
300+
return f"{locus}*{antigen_group}:XX"
301+
302+
@classmethod
303+
def _get_mapping(cls, broad, mapping, prefix):
242304
if prefix:
243305
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
244306
else:

0 commit comments

Comments
 (0)