Skip to content

Commit b479446

Browse files
committed
Fix serology mapping and parsing
1 parent f423251 commit b479446

File tree

4 files changed

+53
-25
lines changed

4 files changed

+53
-25
lines changed

pyard/data_repository.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,23 @@ def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
240240
dictionary=mac, columns=('code', 'alleles'))
241241

242242

243+
def to_serological_name(locus_name: str):
244+
"""
245+
Map a DNA Allele name to Serological Equivalent.
246+
http://hla.alleles.org/antigens/recognised_serology.html
247+
Eg:
248+
A*1 -> A1
249+
...
250+
DRB5*51 -> DR51
251+
:param locus_name: DNA Locus Name
252+
:return: Serological equivalent
253+
"""
254+
locus, sero_number = locus_name.split('*')
255+
sero_locus = locus[:2]
256+
sero_name = sero_locus + sero_number
257+
return sero_name
258+
259+
243260
def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
244261
if not db.table_exists(db_connection, 'serology_mapping'):
245262
# Load WMDA serology mapping data
@@ -270,8 +287,13 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
270287
sero_mapping_combined = pd.concat([usa[['Sero', 'Allele']],
271288
psa[['Sero', 'Allele']],
272289
asa[['Sero', 'Allele']]])
273-
sero_mapping = sero_mapping_combined.groupby('Sero').\
274-
apply(lambda x: '/'.join(sorted(x['Allele']))).\
290+
291+
# Map to only valid serological antigen name
292+
sero_mapping_combined['Sero'] = sero_mapping_combined['Sero']. \
293+
apply(to_serological_name)
294+
295+
sero_mapping = sero_mapping_combined.groupby('Sero'). \
296+
apply(lambda x: '/'.join(sorted(x['Allele']))). \
275297
to_dict()
276298

277299
# Save the serology mapping to db

pyard/db.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]:
7979
if result:
8080
alleles = result[0].split('/')
8181
else:
82-
alleles = None
82+
alleles = []
8383
return alleles
8484

8585

@@ -98,7 +98,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
9898
if result:
9999
alleles = result[0].split('/')
100100
else:
101-
alleles = None
101+
alleles = []
102102
return alleles
103103

104104

pyard/pyard.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,20 +174,14 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
174174

175175
# Handle Serology
176176
if self.is_serology(glstring):
177-
if HLA_regex.search(glstring):
178-
# Remove HLA- prefix
179-
serology = glstring.split("-")[1]
180-
alleles = self._get_alleles_from_serology(serology)
181-
alleles = ['HLA-' + a for a in alleles]
182-
else:
183-
alleles = self._get_alleles_from_serology(glstring)
177+
alleles = self._get_alleles_from_serology(glstring)
184178
return self.redux_gl("/".join(alleles), redux_type)
185179

186180
loc_allele = glstring.split(":")
187181
loc_name, code = loc_allele[0], loc_allele[1]
188182

189183
# Handle XX codes
190-
if (self.is_mac(glstring) and glstring.split(":")[1] == "XX") and loc_name in self.xx_codes:
184+
if self.is_mac(glstring) and code == "XX" and loc_name in self.xx_codes:
191185
return self.redux_gl("/".join(self.xx_codes[loc_name]), redux_type)
192186

193187
# Handle MAC
@@ -207,11 +201,26 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
207201
@staticmethod
208202
def is_serology(allele: str) -> bool:
209203
"""
210-
An allele is serology if the allele name after * is numeral only, no ':'
204+
A serology has the locus name (first 2 letters for DRB1, DRB3, DQB1, DQA1, DPB1 and DPA1)
205+
of the allele followed by numerical antigen.
206+
211207
:param allele: The allele to test for serology
212208
:return: True if serology
213209
"""
214-
return allele.split('*')[1].isdigit()
210+
if '*' in allele or ':' in allele:
211+
return False
212+
213+
locus = allele[0:2]
214+
if locus in ['DR', 'DP', 'DQ']:
215+
antigen = allele[2:]
216+
return antigen.isdigit()
217+
218+
locus = allele[0:1]
219+
if locus in ['A', 'B', 'C', 'D']:
220+
antigen = allele[1:]
221+
return antigen.isdigit()
222+
223+
return False
215224

216225
@staticmethod
217226
def is_mac(gl: str) -> bool:
@@ -255,6 +264,8 @@ def isvalid(self, allele: str) -> bool:
255264
:return: allele or empty
256265
:rtype: bool
257266
"""
267+
if allele == '':
268+
return False
258269
if not self.is_mac(allele) and not self.is_serology(allele):
259270
# Alleles ending with P or G are valid_alleles
260271
if allele.endswith(('P', 'G')):
@@ -310,7 +321,6 @@ def mac_toG(self, allele: str) -> str:
310321
return ''
311322
else:
312323
return "/".join(group)
313-
314324
else:
315325
return ''
316326

tests/features/serology.feature

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,9 @@ Feature: Serology
1212

1313
Examples: Valid A serology typings
1414
| Serology | Level | Redux Allele |
15-
| A*10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G |
16-
| A*10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g |
17-
| A*10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 |
18-
19-
Examples: With HLA- prefix
20-
| Serology | Level | Redux Allele |
21-
| HLA-A*10 | G | HLA-A*26:01:01G/HLA-A*26:10/HLA-A*26:15/HLA-A*26:92/HLA-A*66:01:01G/HLA-A*66:03:01G |
22-
| HLA-B*15:03 | G | HLA-B*15:03:01G |
23-
| HLA-DQB1*1 | G | HLA-DQB1*06:11:01/HLA-DQB1*06:11:02/HLA-DQB1*06:11:03/HLA-DQB1*06:12 |
24-
| HLA-DQB1*1 | lg | HLA-DQB1*06:11g/HLA-DQB1*06:12g |
15+
| A10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G |
16+
| A10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g |
17+
| A10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 |
18+
| A19 | G | A*02:65/A*33:09 |
19+
| DR1403 | G | DRB1*14:03:01/DRB1*14:03:02 |
20+
| DR2 | G | DRB1*15:08/DRB1*16:03 |

0 commit comments

Comments
 (0)