Skip to content

Commit b4d0052

Browse files
authored
Merge pull request #183 from pbashyal-nmdp/shortnull_for_exon
Support short null for `exon` mode
2 parents b69820e + 370fc6d commit b4d0052

File tree

3 files changed

+45
-22
lines changed

3 files changed

+45
-22
lines changed

pyard/data_repository.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -362,17 +362,12 @@ def generate_alleles_and_xx_codes_and_who(
362362
xx_codes = db.load_dict(db_connection, "xx_codes", ("allele_1d", "allele_list"))
363363
xx_codes = {k: v.split("/") for k, v in xx_codes.items()}
364364

365-
shortnulls = db.load_dict(
366-
db_connection, "shortnulls", ("shortnull", "allele_list")
367-
)
368-
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}
369-
370365
exp_alleles = db.load_dict(
371366
db_connection, "exp_alleles", ("exp_allele", "allele_list")
372367
)
373368
exp_alleles = {k: v.split("/") for k, v in exp_alleles.items()}
374369

375-
return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
370+
return valid_alleles, who_alleles, xx_codes, who_group, exp_alleles
376371

377372
# Create a Pandas DataFrame from the mac_code list file
378373
# Skip the header (first 6 lines) and use only the Allele column
@@ -481,6 +476,17 @@ def generate_alleles_and_xx_codes_and_who(
481476
db_connection, "who_group", flat_who_group, columns=("who", "allele_list")
482477
)
483478

479+
return valid_alleles, who_alleles, xx_codes, who_group, exp_alleles
480+
481+
482+
def generate_short_nulls(db_connection, who_group):
483+
if db.table_exists(db_connection, "shortnulls"):
484+
shortnulls = db.load_dict(
485+
db_connection, "shortnulls", ("shortnull", "allele_list")
486+
)
487+
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}
488+
return shortnulls
489+
484490
# shortnulls
485491
# scan WHO alleles for those with expression characters and make shortnull mappings
486492
# DRB4*01:03N | DRB4*01:03:01:02N/DRB4*01:03:01:13N
@@ -508,8 +514,7 @@ def generate_alleles_and_xx_codes_and_who(
508514

509515
db.save_dict(db_connection, "shortnulls", shortnulls, ("shortnull", "allele_list"))
510516
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}
511-
512-
return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
517+
return shortnulls
513518

514519

515520
def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):

pyard/pyard.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from . import data_repository as dr
3131
from .smart_sort import smart_sort_comparator
3232
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
33-
from .misc import get_n_field_allele, get_2field_allele
33+
from .misc import get_n_field_allele, get_2field_allele, expression_chars
3434

3535
HLA_regex = re.compile("^HLA-")
3636

@@ -109,12 +109,14 @@ def __init__(
109109
self.who_alleles,
110110
self.xx_codes,
111111
self.who_group,
112-
self.shortnulls,
113112
self.exp_alleles,
114113
) = dr.generate_alleles_and_xx_codes_and_who(
115114
self.db_connection, imgt_version, self.ars_mappings
116115
)
117116

117+
# Generate short nulls from WHO mapping
118+
self.shortnulls = dr.generate_short_nulls(self.db_connection, self.who_group)
119+
118120
# Load Serology mappings
119121
dr.generate_serology_mapping(self.db_connection, imgt_version)
120122
# Load V2 to V3 mappings
@@ -213,7 +215,16 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) ->
213215
return allele
214216
elif redux_type == "exon":
215217
if allele in self.ars_mappings.exon_group:
216-
return self.ars_mappings.exon_group[allele]
218+
exon_group_allele = self.ars_mappings.exon_group[allele]
219+
# Check if the 3 field exon allele has a 4 field alleles
220+
# that all have the same expression characters
221+
last_char = allele[-1]
222+
if last_char in expression_chars:
223+
exon_short_null_allele = exon_group_allele + last_char
224+
if self.is_shortnull(exon_short_null_allele):
225+
return exon_short_null_allele
226+
227+
return exon_group_allele
217228
else:
218229
# for 'exon' return allele with only first 3 fields
219230
return ":".join(allele.split(":")[0:3])
@@ -360,7 +371,7 @@ def redux_gl(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
360371
else:
361372
raise InvalidMACError(f"{glstring} is an invalid MAC.")
362373

363-
# Handle shortnulls
374+
# Handle short nulls
364375
if self._config["reduce_shortnull"] and self.is_shortnull(glstring):
365376
return self.redux_gl("/".join(self.shortnulls[glstring]), redux_type)
366377

tests/features/shortnulls.feature

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,27 @@
1-
Feature: shortnull
1+
Feature: Short Nulls
2+
3+
**Short Nulls**: If a reduced allele with an expression character has the same expression
4+
character in it's 4 field expansion, the expression character should be propagated in the
5+
reduced version of the allele.
26

37
Scenario Outline:
48

59
Given the allele as <Allele>
610
When reducing on the <Level> level (ambiguous)
711
Then the reduced allele is found to be <Redux Allele>
812

13+
Examples: expression characters not propagated
14+
| Allele | Level | Redux Allele |
15+
| DRB4*01:03N | lgx | DRB4*01:01 |
16+
| DRB4*01:03:01N | lgx | DRB4*01:01 |
17+
| DRB5*01:08N | lgx | DRB5*01:02/DRB5*01:08 |
918

10-
Examples: shortnulls
11-
| Allele | Level | Redux Allele |
12-
| DRB4*01:03N | lgx | DRB4*01:01 |
13-
| DRB4*01:03N | exon | DRB4*01:03:01 |
19+
Examples: expression characters propagated
20+
| Allele | Level | Redux Allele |
21+
| DRB4*01:03N | exon | DRB4*01:03:01N |
1422
| DRB4*01:03N | W | DRB4*01:03:01:02N/DRB4*01:03:01:13N |
15-
| DRB4*01:03:01N | lgx | DRB4*01:01 |
16-
| DRB4*01:03:01N | exon | DRB4*01:03:01 |
23+
| DRB4*01:03:01N | exon | DRB4*01:03:01N |
1724
| DRB4*01:03:01N | W | DRB4*01:03:01:02N/DRB4*01:03:01:13N |
18-
| DRB5*01:08N | lgx | DRB5*01:02/DRB5*01:08 |
19-
| DRB5*01:08N | exon | DRB5*01:08:01N/DRB5*01:08:02N |
20-
| DRB5*01:08N | W | DRB5*01:08:01N/DRB5*01:08:02N |
25+
| DRB5*01:08N | exon | DRB5*01:08:01N/DRB5*01:08:02N |
26+
| DRB5*01:08N | W | DRB5*01:08:01N/DRB5*01:08:02N |
27+
| A*01:04N | exon | A*01:04:01N |

0 commit comments

Comments
 (0)