Skip to content

Commit 2e8e026

Browse files
authored
Merge pull request #353 from pbashyal-nmdp/allow-ignore-alleles
Allow absent/pseudo-gene alleles to be ignored.
2 parents 94ad2be + 97e980d commit 2e8e026

File tree

12 files changed

+153
-19
lines changed

12 files changed

+153
-19
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"
44

55
WORKDIR /app
66

7-
ARG PY_ARD_VERSION=1.5.4
7+
ARG PY_ARD_VERSION=1.5.5
88

99
COPY requirements.txt /app
1010
RUN pip install --no-cache-dir --upgrade pip && \

api-spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ openapi: 3.0.3
22
info:
33
title: ARD Reduction
44
description: Reduce to ARD Level
5-
version: "1.5.4"
5+
version: "1.5.5"
66
servers:
77
- url: 'http://localhost:8080'
88
tags:

pyard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from .misc import get_imgt_db_versions as db_versions
2727

2828
__author__ = """NMDP Bioinformatics"""
29-
__version__ = "1.5.4"
29+
__version__ = "1.5.5"
3030

3131

3232
def init(

pyard/ard.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@
5757
"reduce_MAC": True,
5858
"reduce_shortnull": True,
5959
"ping": True,
60-
"map_drb345_to_drbx": True,
6160
"verbose_log": False,
6261
"ARS_as_lg": False,
6362
"strict": True,
63+
"ignore_allele_with_suffixes": (),
6464
}
6565

6666

@@ -388,7 +388,12 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
388388
non_empty_gls = filter(lambda s: s != "", gls)
389389
return delim.join(
390390
sorted(
391-
non_empty_gls, key=functools.cmp_to_key(self.smart_sort_comparator)
391+
non_empty_gls,
392+
key=functools.cmp_to_key(
393+
lambda a, b: self.smart_sort_comparator(
394+
a, b, self._config["ignore_allele_with_suffixes"]
395+
)
396+
),
392397
)
393398
)
394399

@@ -399,7 +404,14 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
399404
all_gls += gl.split(delim)
400405
unique_gls = filter(lambda s: s != "", set(all_gls))
401406
return delim.join(
402-
sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
407+
sorted(
408+
unique_gls,
409+
key=functools.cmp_to_key(
410+
lambda a, b: self.smart_sort_comparator(
411+
a, b, self._config["ignore_allele_with_suffixes"]
412+
)
413+
),
414+
)
403415
)
404416

405417
@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
@@ -445,6 +457,11 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str
445457
[self.redux(a, redux_type) for a in glstring.split("/")], "/"
446458
)
447459

460+
if self._config["ignore_allele_with_suffixes"]:
461+
_, fields = glstring.split("*")
462+
if fields in self._config["ignore_allele_with_suffixes"]:
463+
return glstring
464+
448465
# Handle V2 to V3 mapping
449466
if self.is_v2(glstring):
450467
glstring = self._map_v2_to_v3(glstring)
@@ -789,6 +806,11 @@ def _is_valid(self, allele: str) -> bool:
789806
if not alphanum_allele.isalnum():
790807
return False
791808

809+
if self._config["ignore_allele_with_suffixes"]:
810+
locus, fields = allele.split("*")
811+
if fields in self._config["ignore_allele_with_suffixes"]:
812+
return True
813+
792814
if not self._config["strict"]:
793815
allele = self._get_non_strict_allele(allele)
794816

pyard/smart_sort.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,19 @@
3232

3333

3434
@functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE)
35-
def smart_sort_comparator(a1, a2):
35+
def smart_sort_comparator(a1, a2, ignore_suffixes=()):
3636
"""
3737
Natural sort 2 given alleles.
3838
3939
Python sorts strings lexicographically but HLA alleles need
4040
to be sorted by numerical values in each field of the HLA nomenclature.
4141
42+
If allele suffixes are in ignore_suffixes, comparison results in that
43+
appearing later.
44+
4245
:param a1: first allele
4346
:param a2: second allele
47+
:param ignore_suffix: tuple of suffixes
4448
"""
4549

4650
# Check to see if they are the same alleles
@@ -54,6 +58,16 @@ def smart_sort_comparator(a1, a2):
5458
else:
5559
return -1
5660

61+
if ignore_suffixes and "*" in a1:
62+
_, fields = a1.split("*")
63+
if fields in ignore_suffixes:
64+
return 1
65+
66+
if ignore_suffixes and "*" in a2:
67+
_, fields = a2.split("*")
68+
if fields in ignore_suffixes:
69+
return -1
70+
5771
# remove any non-numerics
5872
a1 = re.sub(expr_regex, "", a1)
5973
a2 = re.sub(expr_regex, "", a2)

scripts/pyard-reduce-csv

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import pandas as pd
3939
import pyard
4040
from pyard.db import similar_alleles
4141
import pyard.drbx as drbx
42-
from pyard.exceptions import PyArdError, InvalidTypingError
42+
from pyard.exceptions import PyArdError, InvalidTypingError, InvalidAlleleError
4343
from pyard.misc import get_data_dir, get_imgt_version, download_to_file
4444

4545

@@ -277,10 +277,45 @@ def create_reduced_slug(locus_typ1_typ2_pair):
277277
return typ1
278278

279279

280+
def apply_drbx(gl_string):
281+
slugs = gl_string.split("^")
282+
alleles = [allele for slug in slugs for allele in slug.split("+")]
283+
drbx_loci = ("DRB3", "DRB4", "DRB5")
284+
285+
# Filter for DRBX alleles
286+
drbx_alleles = [
287+
allele
288+
for allele in alleles
289+
if any(allele.startswith(locus) for locus in drbx_loci)
290+
]
291+
292+
# Create new GL string without DRBX alleles
293+
filtered_slugs = []
294+
for slug in slugs:
295+
non_drbx_alleles = []
296+
for allele in slug.split("+"):
297+
if not any(allele.startswith(locus) for locus in drbx_loci):
298+
non_drbx_alleles.append(allele)
299+
if non_drbx_alleles:
300+
filtered_slugs.append("+".join(non_drbx_alleles))
301+
302+
new_gl_string = "^".join(filtered_slugs)
303+
304+
drbx_slug = drbx.map_drbx(drbx_alleles, True)
305+
gl_string_drbx = new_gl_string + "^" + "+".join(drbx_slug)
306+
307+
return gl_string_drbx
308+
309+
280310
def reduce_glstring(glstring: str) -> str:
281311
try:
282-
return ard.redux(glstring, ard_config["redux_type"])
283-
except InvalidTypingError as e:
312+
ard_redux = ard.redux(glstring, ard_config["redux_type"])
313+
if ard_config.get("map_drb345_to_drbx"):
314+
glstring_drbx = apply_drbx(ard_redux)
315+
return glstring_drbx
316+
else:
317+
return ard_redux
318+
except (InvalidTypingError, InvalidAlleleError) as e:
284319
print(f"Error reducing {glstring} \n", e.message, file=sys.stderr)
285320
return "Failed"
286321

@@ -391,6 +426,9 @@ if __name__ == "__main__":
391426
"reduce_MAC": ard_config.get("reduce_MAC", True),
392427
"map_drb345_to_drbx": ard_config.get("map_drb345_to_drbx", True),
393428
"verbose_log": ard_config.get("verbose_log", True),
429+
"ignore_allele_with_suffixes": tuple(
430+
ard_config.get("ignore_allele_with_suffixes", tuple())
431+
),
394432
}
395433
ard = pyard.init(
396434
imgt_version=imgt_version,

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 1.5.4
2+
current_version = 1.5.5
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636

3737
setup(
3838
name="py-ard",
39-
version="1.5.4",
39+
version="1.5.5",
4040
description="ARD reduction for HLA with Python",
4141
long_description=readme,
4242
long_description_content_type="text/markdown",

tests/environment.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,14 @@ def before_all(context):
3636
context.ard_non_strict = pyard.init(
3737
"3440", data_dir="/tmp/py-ard", config=non_strict_config
3838
)
39+
40+
# Ignored allele suffixes
41+
ignore_suffix_mode = {
42+
"ignore_allele_with_suffixes": (
43+
"NNNN",
44+
"UUUU",
45+
)
46+
}
47+
context.ard_ignore_suffix = pyard.init(
48+
"3440", data_dir="/tmp/py-ard", config=ignore_suffix_mode
49+
)

tests/features/allele.feature

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,19 @@ Feature: Alleles
6060

6161
Scenario Outline: Allele validation in non-strict mode
6262

63-
Similar to reduction, handle non-strict mode when validating an allele.
64-
The test version of IPD/IMGT-HLA database (see environment.py),
65-
A*11:403 is invalid and A*24:329 is valid for A*24:329Q
63+
Similar to reduction, handle non-strict mode when validating an allele.
64+
The test version of IPD/IMGT-HLA database (see environment.py),
65+
A*11:403 is invalid and A*24:329 is valid for A*24:329Q
6666

6767
Given the allele as <Allele>
6868
When checking for validity of the allele in non-strict mode
6969
Then the validness of the allele is <Validity>
7070

7171
Examples:
72-
| Allele | Validity |
73-
| A*11:403 | Invalid |
74-
| A*24:329 | Valid |
75-
72+
| Allele | Validity |
73+
| A*11:403 | Invalid |
74+
| A*24:329 | Valid |
75+
| DRBX*NNNN | Invalid |
7676

7777
Scenario Outline: Single field MICA, MICB Alleles
7878

@@ -88,3 +88,26 @@ Feature: Alleles
8888
| MICA*040 | lgx | MICA*040 |
8989
| MICB*006 | lgx | MICB*006 |
9090
| MICB*029 | lgx | MICB*029 |
91+
92+
Scenario Outline: Ignore reduction of DRBX*NNNN
93+
Given the allele as <Allele>
94+
When reducing on the <Level> level in ignore_suffix mode
95+
Then the reduced allele is found to be <Redux Allele>
96+
97+
Examples:
98+
| Allele | Level | Redux Allele |
99+
| DRBX*NNNN | lgx | DRBX*NNNN |
100+
| DRBX*NNNN | G | DRBX*NNNN |
101+
| DRB1*UUUU | lg | DRB1*UUUU |
102+
103+
Scenario Outline: Allele validation in ignore_suffix mode
104+
105+
DRBX*NNNN is valid in ignore_suffix_mode
106+
107+
Given the allele as <Allele>
108+
When checking for validity of the allele in ignore_suffix mode
109+
Then the validness of the allele is <Validity>
110+
111+
Examples:
112+
| Allele | Validity |
113+
| DRBX*NNNN | Valid |

0 commit comments

Comments
 (0)