Skip to content

Commit df76726

Browse files
authored
Shortnulls (#154)
* shortnulls implementation * shortnulls passes all tests * more meaningful variable names, fixed error that caused invalids on first run * made shortnull data structure consistent * even more pythonic use of set, change back the location of the database * now with more pythonsim
1 parent 54a46cb commit df76726

File tree

2 files changed

+81
-9
lines changed

2 files changed

+81
-9
lines changed

pyard/data_repository.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,15 @@ def generate_alleles_and_xx_codes_and_who(db_connection: sqlite3.Connection, img
211211
('allele_1d', 'allele_list'))
212212
xx_codes = {k: v.split('/') for k, v in xx_codes.items()}
213213

214-
return valid_alleles, who_alleles, xx_codes, who_group
214+
shortnulls = db.load_dict(db_connection, 'shortnulls',
215+
('shortnull', 'allele_list'))
216+
shortnulls = {k: v.split('/') for k, v in shortnulls.items()}
217+
218+
exp_alleles = db.load_dict(db_connection, 'exp_alleles',
219+
('exp_allele', 'allele_list'))
220+
exp_alleles = {k: v.split('/') for k, v in exp_alleles.items()}
221+
222+
return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
215223

216224
# Create a Pandas DataFrame from the mac_code list file
217225
# Skip the header (first 6 lines) and use only the Allele column
@@ -229,6 +237,12 @@ def generate_alleles_and_xx_codes_and_who(db_connection: sqlite3.Connection, img
229237
exp_alleles = allele_df[allele_df['Allele'].apply(
230238
lambda a: a[-1] in expression_chars and number_of_fields(a) > 2)]
231239
exp_alleles = exp_alleles.groupby('2d').apply(expression_reduce).dropna()
240+
241+
#flat_exp_alleles = {k: '/'.join(sorted(v, key=functools.cmp_to_key(smart_sort_comparator)))
242+
# for k, v in exp_alleles.items()}
243+
db.save_dict(db_connection, 'exp_alleles', exp_alleles,
244+
('exp_allele', 'allele_list'))
245+
232246
# Create valid set of alleles:
233247
# All full length alleles
234248
# All 3rd and 2nd field versions of longer alleles
@@ -304,7 +318,37 @@ def generate_alleles_and_xx_codes_and_who(db_connection: sqlite3.Connection, img
304318
db.save_dict(db_connection, 'who_group', flat_who_group,
305319
columns=('who', 'allele_list'))
306320

307-
return valid_alleles, who_alleles, xx_codes, who_group
321+
# shortnulls
322+
# scan WHO alleles for those with expression characters and make shortnull mappings
323+
# DRB4*01:03N | DRB4*01:03:01:02N/DRB4*01:03:01:13N
324+
# DRB5*01:08N | DRB5*01:08:01N/DRB5*01:08:02N
325+
shortnulls = dict()
326+
for who in who_group:
327+
# e.g. DRB4*01:03
328+
expression_alleles=[]
329+
expression_chars_found = set()
330+
if who[-1] not in expression_chars and who[-1] not in ['G', 'P'] and ":" in who:
331+
for an_allele in who_group[who]:
332+
# if an allele in a who_group has an expression character but the group allele doesnt,
333+
# add it to shortnulls
334+
last_char = an_allele[-1]
335+
if last_char in expression_chars:
336+
# e.g. DRB4*01:03:01:02N
337+
expression_chars_found.add(last_char)
338+
# add this allele to the set that this short null exapands to
339+
expression_alleles.append(an_allele)
340+
# only create a shortnull if there is one expression character in this who_group
341+
# there is nothing to be done for who_groups that have both Q and L for example
342+
if expression_alleles:
343+
if len(expression_chars_found) ==1:
344+
# e.g. DRB4*01:03N
345+
a_shortnull = who + list(expression_chars_found)[0]
346+
shortnulls[a_shortnull] = "/".join(expression_alleles)
347+
348+
db.save_dict(db_connection, 'shortnulls', shortnulls, ('shortnull', 'allele_list'))
349+
shortnulls = {k: v.split('/') for k, v in shortnulls.items()}
350+
351+
return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
308352

309353

310354
def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):

pyard/pyard.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from . import data_repository as dr
3131
from .smart_sort import smart_sort_comparator
3232
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
33-
from .misc import get_n_field_allele
33+
from .misc import get_n_field_allele, get_2field_allele
3434

3535
HLA_regex = re.compile("^HLA-")
3636

@@ -47,6 +47,7 @@
4747
"reduce_P": True,
4848
"reduce_XX": True,
4949
"reduce_MAC": True,
50+
"reduce_shortnull": True,
5051
"map_drb345_to_drbx": True,
5152
"verbose_log": True
5253
}
@@ -100,7 +101,7 @@ def __init__(self, imgt_version: str = 'Latest', data_dir: str = None, config: d
100101
# Load ARS mappings
101102
self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version)
102103
# Load Alleles and XX Codes
103-
self.valid_alleles, self.who_alleles, self.xx_codes, self.who_group = \
104+
self.valid_alleles, self.who_alleles, self.xx_codes, self.who_group, self.shortnulls, self.exp_alleles = \
104105
dr.generate_alleles_and_xx_codes_and_who(self.db_connection, imgt_version, self.ars_mappings)
105106

106107
# Load Serology mappings
@@ -210,6 +211,8 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES) -> str:
210211
# If ambiguous, reduce to G group level
211212
return self.redux(allele, 'lgx')
212213
else:
214+
# TODO: make this an explicit lookup to the g_group or p_group table
215+
# just having a shorter name be valid is not stringent enough
213216
if allele.endswith(('P', 'G')):
214217
allele = allele[:-1]
215218
if self._is_valid_allele(allele):
@@ -311,6 +314,11 @@ def redux_gl(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
311314
else:
312315
raise InvalidMACError(f"{glstring} is an invalid MAC.")
313316

317+
# Handle shortnulls
318+
if self._config["reduce_shortnull"] and self.is_shortnull(glstring):
319+
return self.redux_gl("/".join(self.shortnulls[glstring]), redux_type)
320+
#return self.redux_gl(self.shortnulls[glstring], redux_type)
321+
314322
return self.redux(glstring, redux_type)
315323

316324
def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
@@ -393,6 +401,23 @@ def _is_valid_allele(self, allele):
393401
"""
394402
return allele in self.valid_alleles
395403

404+
def is_shortnull(self, allele):
405+
"""
406+
Test if allele is valid in list of shortnull alleles and
407+
the reduce_shortnull is configured to True (WMDA rules)
408+
:param allele: Allele to test
409+
:return: bool to indicate if allele is valid
410+
"""
411+
return allele in self.shortnulls and self._config["reduce_shortnull"]
412+
413+
def is_exp_allele(self, allele):
414+
"""
415+
Test if allele is valid as a shortening (WHO rules)
416+
:param allele: Allele to test
417+
:return: bool to indicate if allele is valid
418+
"""
419+
return allele in self.exp_alleles
420+
396421
def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
397422
"""
398423
Look up allele code in database and generate alleles
@@ -411,7 +436,7 @@ def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
411436
else:
412437
alleles = [f'{locus_antigen}:{a}' for a in alleles]
413438

414-
return filter(self._is_valid_allele, alleles)
439+
return list(filter(self._is_valid_allele, alleles))
415440

416441
def _get_alleles_from_serology(self, serology) -> Iterable[str]:
417442
alleles = db.serology_to_alleles(self.db_connection, serology)
@@ -491,16 +516,19 @@ def isvalid(self, allele: str) -> bool:
491516
if not self.is_mac(allele) and \
492517
not self.is_XX(allele) and \
493518
not self.is_serology(allele) and \
494-
not self.is_v2(allele):
519+
not self.is_v2(allele) and \
520+
not self.is_shortnull(allele):
495521
# Alleles ending with P or G are valid_alleles
496522
if allele.endswith(('P', 'G')):
497523
# remove the last character
498524
allele = allele[:-1]
499525
if self._is_valid_allele(allele):
500526
return True
501-
else:
502-
# reduce to 2 field for things like DPB1*28:01:01G
503-
allele = ':'.join(allele.split(':')[0:2])
527+
else:
528+
allele = get_2field_allele(allele)
529+
if self._is_valid_allele(allele):
530+
return True
531+
504532
return self._is_valid_allele(allele)
505533
return True
506534

0 commit comments

Comments
 (0)