Skip to content

Commit 82f75b5

Browse files
committed
Allow absent/pseudo-gene alleles to be ignored.
- "ignore_allele_with_suffixes" config property can be passed to `ard.init()` to ignore any alleles with those fields. - add drbx reduction for gl strings in batch mode. - Updated feature files with new scenarios to support absence alleles
1 parent 94ad2be commit 82f75b5

File tree

7 files changed

+148
-13
lines changed

7 files changed

+148
-13
lines changed

pyard/ard.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
"verbose_log": False,
6262
"ARS_as_lg": False,
6363
"strict": True,
64+
"ignore_allele_with_suffixes": (),
6465
}
6566

6667

@@ -388,7 +389,12 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
388389
non_empty_gls = filter(lambda s: s != "", gls)
389390
return delim.join(
390391
sorted(
391-
non_empty_gls, key=functools.cmp_to_key(self.smart_sort_comparator)
392+
non_empty_gls,
393+
key=functools.cmp_to_key(
394+
lambda a, b: self.smart_sort_comparator(
395+
a, b, self._config["ignore_allele_with_suffixes"]
396+
)
397+
),
392398
)
393399
)
394400

@@ -399,7 +405,14 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
399405
all_gls += gl.split(delim)
400406
unique_gls = filter(lambda s: s != "", set(all_gls))
401407
return delim.join(
402-
sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
408+
sorted(
409+
unique_gls,
410+
key=functools.cmp_to_key(
411+
lambda a, b: self.smart_sort_comparator(
412+
a, b, self._config["ignore_allele_with_suffixes"]
413+
)
414+
),
415+
)
403416
)
404417

405418
@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
@@ -445,6 +458,11 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str
445458
[self.redux(a, redux_type) for a in glstring.split("/")], "/"
446459
)
447460

461+
if self._config["ignore_allele_with_suffixes"]:
462+
_, fields = glstring.split("*")
463+
if fields in self._config["ignore_allele_with_suffixes"]:
464+
return glstring
465+
448466
# Handle V2 to V3 mapping
449467
if self.is_v2(glstring):
450468
glstring = self._map_v2_to_v3(glstring)
@@ -789,6 +807,11 @@ def _is_valid(self, allele: str) -> bool:
789807
if not alphanum_allele.isalnum():
790808
return False
791809

810+
if self._config["ignore_allele_with_suffixes"]:
811+
locus, fields = allele.split("*")
812+
if fields in self._config["ignore_allele_with_suffixes"]:
813+
return True
814+
792815
if not self._config["strict"]:
793816
allele = self._get_non_strict_allele(allele)
794817

pyard/smart_sort.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,19 @@
3232

3333

3434
@functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE)
35-
def smart_sort_comparator(a1, a2):
35+
def smart_sort_comparator(a1, a2, ignore_suffixes=()):
3636
"""
3737
Natural sort 2 given alleles.
3838
3939
Python sorts strings lexicographically but HLA alleles need
4040
to be sorted by numerical values in each field of the HLA nomenclature.
4141
42+
If allele suffixes are in ignore_suffixes, comparison results in that
43+
appearing later.
44+
4245
:param a1: first allele
4346
:param a2: second allele
47+
:param ignore_suffix: tuple of suffixes
4448
"""
4549

4650
# Check to see if they are the same alleles
@@ -54,6 +58,16 @@ def smart_sort_comparator(a1, a2):
5458
else:
5559
return -1
5660

61+
if ignore_suffixes and "*" in a1:
62+
_, fields = a1.split("*")
63+
if fields in ignore_suffixes:
64+
return 1
65+
66+
if ignore_suffixes and "*" in a2:
67+
_, fields = a2.split("*")
68+
if fields in ignore_suffixes:
69+
return -1
70+
5771
# remove any non-numerics
5872
a1 = re.sub(expr_regex, "", a1)
5973
a2 = re.sub(expr_regex, "", a2)

scripts/pyard-reduce-csv

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import pandas as pd
3939
import pyard
4040
from pyard.db import similar_alleles
4141
import pyard.drbx as drbx
42-
from pyard.exceptions import PyArdError, InvalidTypingError
42+
from pyard.exceptions import PyArdError, InvalidTypingError, InvalidAlleleError
4343
from pyard.misc import get_data_dir, get_imgt_version, download_to_file
4444

4545

@@ -277,10 +277,45 @@ def create_reduced_slug(locus_typ1_typ2_pair):
277277
return typ1
278278

279279

280+
def apply_drbx(gl_string):
281+
slugs = gl_string.split("^")
282+
alleles = [allele for slug in slugs for allele in slug.split("+")]
283+
drbx_loci = ("DRB3", "DRB4", "DRB5")
284+
285+
# Filter for DRBX alleles
286+
drbx_alleles = [
287+
allele
288+
for allele in alleles
289+
if any(allele.startswith(locus) for locus in drbx_loci)
290+
]
291+
292+
# Create new GL string without DRBX alleles
293+
filtered_slugs = []
294+
for slug in slugs:
295+
non_drbx_alleles = []
296+
for allele in slug.split("+"):
297+
if not any(allele.startswith(locus) for locus in drbx_loci):
298+
non_drbx_alleles.append(allele)
299+
if non_drbx_alleles:
300+
filtered_slugs.append("+".join(non_drbx_alleles))
301+
302+
new_gl_string = "^".join(filtered_slugs)
303+
304+
drbx_slug = drbx.map_drbx(drbx_alleles, True)
305+
gl_string_drbx = new_gl_string + "^" + "+".join(drbx_slug)
306+
307+
return gl_string_drbx
308+
309+
280310
def reduce_glstring(glstring: str) -> str:
281311
try:
282-
return ard.redux(glstring, ard_config["redux_type"])
283-
except InvalidTypingError as e:
312+
ard_redux = ard.redux(glstring, ard_config["redux_type"])
313+
if ard_config.get("map_drb345_to_drbx"):
314+
glstring_drbx = apply_drbx(ard_redux)
315+
return glstring_drbx
316+
else:
317+
return ard_redux
318+
except (InvalidTypingError, InvalidAlleleError) as e:
284319
print(f"Error reducing {glstring} \n", e.message, file=sys.stderr)
285320
return "Failed"
286321

@@ -391,6 +426,9 @@ if __name__ == "__main__":
391426
"reduce_MAC": ard_config.get("reduce_MAC", True),
392427
"map_drb345_to_drbx": ard_config.get("map_drb345_to_drbx", True),
393428
"verbose_log": ard_config.get("verbose_log", True),
429+
"ignore_allele_with_suffixes": tuple(
430+
ard_config.get("ignore_allele_with_suffixes", tuple())
431+
),
394432
}
395433
ard = pyard.init(
396434
imgt_version=imgt_version,

tests/environment.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,14 @@ def before_all(context):
3636
context.ard_non_strict = pyard.init(
3737
"3440", data_dir="/tmp/py-ard", config=non_strict_config
3838
)
39+
40+
# Ignored allele suffixes
41+
ignore_suffix_mode = {
42+
"ignore_allele_with_suffixes": (
43+
"NNNN",
44+
"UUUU",
45+
)
46+
}
47+
context.ard_ignore_suffix = pyard.init(
48+
"3440", data_dir="/tmp/py-ard", config=ignore_suffix_mode
49+
)

tests/features/allele.feature

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,19 @@ Feature: Alleles
6060

6161
Scenario Outline: Allele validation in non-strict mode
6262

63-
Similar to reduction, handle non-strict mode when validating an allele.
64-
The test version of IPD/IMGT-HLA database (see environment.py),
65-
A*11:403 is invalid and A*24:329 is valid for A*24:329Q
63+
Similar to reduction, handle non-strict mode when validating an allele.
64+
The test version of IPD/IMGT-HLA database (see environment.py),
65+
A*11:403 is invalid and A*24:329 is valid for A*24:329Q
6666

6767
Given the allele as <Allele>
6868
When checking for validity of the allele in non-strict mode
6969
Then the validness of the allele is <Validity>
7070

7171
Examples:
72-
| Allele | Validity |
73-
| A*11:403 | Invalid |
74-
| A*24:329 | Valid |
75-
72+
| Allele | Validity |
73+
| A*11:403 | Invalid |
74+
| A*24:329 | Valid |
75+
| DRBX*NNNN | Invalid |
7676

7777
Scenario Outline: Single field MICA, MICB Alleles
7878

@@ -88,3 +88,26 @@ Feature: Alleles
8888
| MICA*040 | lgx | MICA*040 |
8989
| MICB*006 | lgx | MICB*006 |
9090
| MICB*029 | lgx | MICB*029 |
91+
92+
Scenario Outline: Ignore reduction of DRBX*NNNN
93+
Given the allele as <Allele>
94+
When reducing on the <Level> level in ignore_suffix mode
95+
Then the reduced allele is found to be <Redux Allele>
96+
97+
Examples:
98+
| Allele | Level | Redux Allele |
99+
| DRBX*NNNN | lgx | DRBX*NNNN |
100+
| DRBX*NNNN | G | DRBX*NNNN |
101+
| DRB1*UUUU | lg | DRB1*UUUU |
102+
103+
Scenario Outline: Allele validation in ignore_suffix mode
104+
105+
DRBX*NNNN is valid in ignore_suffix_mode
106+
107+
Given the allele as <Allele>
108+
When checking for validity of the allele in ignore_suffix mode
109+
Then the validness of the allele is <Validity>
110+
111+
Examples:
112+
| Allele | Validity |
113+
| DRBX*NNNN | Valid |

tests/features/glstring.feature

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,15 @@ Feature: GL (Genotype List) Strings
1919
| A*01:01~B*07:02+A*01:01~B*07:02 | G | A*01:01:01G~B*07:02:01G+A*01:01:01G~B*07:02:01G |
2020
| A*01:01~B*07:02+A*01:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g |
2121
| A*01:01~B*07:02+A*01:01~B*07:02\|A*02:01~B*07:02+A*02:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g\|A*02:01g~B*07:02g+A*02:01g~B*07:02g |
22+
23+
24+
Scenario Outline: Ignore reduction of DRBX*NNNN in GL String
25+
Given the allele as <GL String>
26+
When reducing on the <Level> level in ignore_suffix mode
27+
Then the reduced allele is found to be <Redux GL String>
28+
29+
Examples:
30+
| GL String | Level | Redux GL String |
31+
| DRBX*NNNN+DRB3*03:ECXMH | lgx | DRB3*03:01+DRBX*NNNN |
32+
| DRB3*03:ECXMH+DRBX*NNNN | lgx | DRB3*03:01+DRBX*NNNN |
33+
| DRB1*UUUU+DRB1*12:02 | G | DRB1*12:02:01G/DRB1*12:02:02G+DRB1*UUUU |

tests/steps/redux_allele.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,20 @@ def step_impl(context):
126126
context.is_valid = False
127127

128128

129+
@when("reducing on the {level} level in ignore_suffix mode")
130+
def step_impl(context, level):
131+
context.level = level
132+
context.redux_allele = context.ard_ignore_suffix.redux(context.allele, level)
133+
134+
135+
@when("checking for validity of the allele in ignore_suffix mode")
136+
def step_impl(context):
137+
try:
138+
context.is_valid = context.ard_ignore_suffix.validate(context.allele)
139+
except InvalidAlleleError:
140+
context.is_valid = False
141+
142+
129143
@then("the validness of the allele is {validity}")
130144
def step_impl(context, validity):
131145
valid = validity == "Valid"

0 commit comments

Comments
 (0)