Skip to content

Commit 432feee

Browse files
authored
Merge pull request #68 from pbashyal-nmdp/predict_v3_from_v2
Predict v3 from v2
2 parents 9017299 + 34e4888 commit 432feee

File tree

3 files changed

+78
-10
lines changed

3 files changed

+78
-10
lines changed

README.rst

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ py-ard
1111
:alt: Documentation Status
1212

1313

14-
ARD reduction for HLA with python
14+
ARD reduction for HLA with Python
1515

1616
* Free software: LGPL 3.0
1717
* Documentation: https://pyard.readthedocs.io.
@@ -68,26 +68,33 @@ Example
6868
# You can choose to refresh the MAC code for previously used versions
6969
# ard = pyard.ARD(3290, refresh_mac=True)
7070
71-
# Allele to reduce
71+
#
72+
# Reduce Allele
73+
#
7274
allele = "A*01:01:01"
7375
7476
ard.redux(allele, 'G')
75-
# 'A*01:01:01G'
77+
# >>> 'A*01:01:01G'
7678
7779
ard.redux(allele, 'lg')
78-
# 'A*01:01g'
80+
# >>> 'A*01:01g'
7981
8082
ard.redux(allele, 'lgx')
81-
# 'A*01:01'
83+
# >>> 'A*01:01'
8284
85+
#
86+
# Reduce GL String
87+
#
8388
ard.redux_gl("A*01:01/A*01:01N+A*02:AB^B*07:02+B*07:AB", "G")
8489
# 'B*07:02:01G+B*07:02:01G^A*01:01:01G+A*02:01:01G/A*02:02'
8590
8691
# py-ard can also reduce serology based typings
8792
ard.redux_gl('HLA-A*10^HLA-A*9', 'lg')
93+
# >>> ard_gl
8894
# 'HLA-A*24:19g/HLA-A*24:22g^HLA-A*26:01g/HLA-A*26:10g/HLA-A*26:15g/HLA-A*26:92g/HLA-A*66:01g/HLA-A*66:03g'
8995
9096
97+
9198
Command Line Tools
9299
------------------
93100

@@ -107,6 +114,9 @@ Command Line Tools
107114
Created py-ard version 3290 database
108115
Updated v2_mapping table with 'map2to3.csv' mapping file.
109116
117+
# Replace the Latest IMGT database with V2 mappings
118+
$ pyard-import --v2-to-v3-mapping map2to3.csv
119+
110120
# Reduce a gl string from command line
111121
$ pyard --gl 'A*01:AB' -r lgx
112122
A*01:01/A*01:02
@@ -115,4 +125,4 @@ Command Line Tools
115125
DRB1*08:01:01G/DRB1*08:02:01G/DRB1*08:03:02G/DRB1*08:04:01G/DRB1*08:05/ ...
116126
117127
$ pyard -v 3290 --gl 'A1' -r lgx
118-
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...
128+
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...

pyard/pyard.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
188188
loc_antigen, code = loc_allele[0], loc_allele[1]
189189

190190
# Handle XX codes
191-
if self.is_mac(glstring) and code == "XX" and loc_antigen in self.xx_codes:
191+
if self.is_XX(glstring, loc_antigen, code):
192192
return self.redux_gl("/".join(self.xx_codes[loc_antigen]), redux_type)
193193

194194
# Handle MAC
@@ -205,6 +205,12 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
205205

206206
return self.redux(glstring, redux_type)
207207

208+
def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
209+
if loc_antigen is None or code is None:
210+
loc_allele = glstring.split(":")
211+
loc_antigen, code = loc_allele[0], loc_allele[1]
212+
return self.is_mac(glstring) and code == "XX" and loc_antigen in self.xx_codes
213+
208214
@staticmethod
209215
def is_serology(allele: str) -> bool:
210216
"""
@@ -248,7 +254,7 @@ def is_v2(allele: str) -> bool:
248254
:param allele: Possible allele
249255
:return: Is the allele in V2 nomenclature
250256
"""
251-
return '*' in allele and not ':' in allele
257+
return '*' in allele and ':' not in allele
252258

253259
def _is_valid_allele(self, allele):
254260
"""
@@ -290,13 +296,53 @@ def _get_alleles_from_serology(self, serology) -> Iterable[str]:
290296
else:
291297
return alleles
292298

299+
def _combine_with_colon(self, digits_field):
300+
num_of_digits = len(digits_field)
301+
return ':'.join(digits_field[i:i + 2] for i in range(0, num_of_digits, 2))
302+
303+
def _predict_v3(self, v2_allele: str) -> str:
304+
"""
305+
Use heuristic to predict V3 from V2
306+
307+
:param v2_allele: Allele in V2 format
308+
:return: V3 format of V2 allele
309+
"""
310+
# Separate out the locus and the allele name part
311+
locus, allele_name = v2_allele.split('*')
312+
# Separate out the numeric and non-numeric components
313+
components = re.findall(r'^(\d+)(.*)', allele_name)
314+
if not components:
315+
return v2_allele
316+
digits_field, non_digits_field = components.pop()
317+
# final_allele is the result of the transformation
318+
final_allele = digits_field
319+
num_of_digits = len(digits_field)
320+
if num_of_digits == 1:
321+
return v2_allele
322+
if num_of_digits > 2:
323+
if locus.startswith('DP') and num_of_digits == 5: # covers DPs with 5 digits
324+
final_allele = digits_field[:3] + ':' + (digits_field[3:]) + non_digits_field
325+
elif num_of_digits % 2 == 0: # covers digits with 2, 4, 6, 8
326+
final_allele = self._combine_with_colon(digits_field) + non_digits_field
327+
else:
328+
final_allele = digits_field[:2] + ':' + (digits_field[2:]) + non_digits_field
329+
else:
330+
if non_digits_field:
331+
final_allele = digits_field + ':' + non_digits_field
332+
return locus + '*' + final_allele
333+
293334
def _map_v2_to_v3(self, v2_allele):
294335
"""
295336
Get V3 version of V2 versioned allele
296337
:param v2_allele: V2 versioned allele
297338
:return: V3 versioned allele
298339
"""
299-
return v2_to_v3_allele(self.db_connection, v2_allele)
340+
# Check if it's in the exception case mapping
341+
v3_allele = v2_to_v3_allele(self.db_connection, v2_allele)
342+
if not v3_allele:
343+
# Try and predict V3
344+
v3_allele = self._predict_v3(v2_allele)
345+
return v3_allele
300346

301347
def isvalid(self, allele: str) -> bool:
302348
"""
@@ -404,3 +450,14 @@ def expand_mac(self, mac_code: str):
404450
return list(self._get_alleles(code, locus_antigen))
405451

406452
return ''
453+
454+
def v2_to_v3(self, v2_allele) -> str:
455+
"""
456+
Convert Version 2 Allele Name to Version 3 Allele Name
457+
458+
:param v2_allele: Version 2 Allele Name
459+
:return: Version 3 Allele Name
460+
"""
461+
if self.is_v2(v2_allele):
462+
return self._map_v2_to_v3(v2_allele)
463+
return v2_allele

scripts/pyard-import

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,12 @@ if __name__ == '__main__':
9292
print(f"Created py-ard version {imgt_version} database")
9393
else:
9494
ard = pyard.ARD(data_dir=data_dir)
95+
imgt_version = 'Latest'
9596
print(f"Created Latest py-ard database")
9697
del ard
9798

9899
if v2_to_v3_dict:
99100
db_connection = db.create_db_connection(data_dir, imgt_version, ro=False)
100101
db.save_dict(db_connection, table_name='v2_mapping',
101102
dictionary=v2_to_v3_dict, columns=('v2', 'v3'))
102-
print(f"Updated v2_mapping table with '{args.v2_v3_mapping}' mapping file.")
103+
print(f"Updated v2_mapping table with '{args.v2_v3_mapping}' mapping file for {imgt_version} IMGT database.")

0 commit comments

Comments
 (0)