Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"

WORKDIR /app

ARG PY_ARD_VERSION=1.5.4
ARG PY_ARD_VERSION=1.5.5

COPY requirements.txt /app
RUN pip install --no-cache-dir --upgrade pip && \
Expand Down
2 changes: 1 addition & 1 deletion api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.3
info:
title: ARD Reduction
description: Reduce to ARD Level
version: "1.5.4"
version: "1.5.5"
servers:
- url: 'http://localhost:8080'
tags:
Expand Down
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .misc import get_imgt_db_versions as db_versions

__author__ = """NMDP Bioinformatics"""
__version__ = "1.5.4"
__version__ = "1.5.5"


def init(
Expand Down
28 changes: 25 additions & 3 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@
"reduce_MAC": True,
"reduce_shortnull": True,
"ping": True,
"map_drb345_to_drbx": True,
"verbose_log": False,
"ARS_as_lg": False,
"strict": True,
"ignore_allele_with_suffixes": (),
}


Expand Down Expand Up @@ -388,7 +388,12 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
non_empty_gls = filter(lambda s: s != "", gls)
return delim.join(
sorted(
non_empty_gls, key=functools.cmp_to_key(self.smart_sort_comparator)
non_empty_gls,
key=functools.cmp_to_key(
lambda a, b: self.smart_sort_comparator(
a, b, self._config["ignore_allele_with_suffixes"]
)
),
)
)

Expand All @@ -399,7 +404,14 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
all_gls += gl.split(delim)
unique_gls = filter(lambda s: s != "", set(all_gls))
return delim.join(
sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
sorted(
unique_gls,
key=functools.cmp_to_key(
lambda a, b: self.smart_sort_comparator(
a, b, self._config["ignore_allele_with_suffixes"]
)
),
)
)

@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
Expand Down Expand Up @@ -445,6 +457,11 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str
[self.redux(a, redux_type) for a in glstring.split("/")], "/"
)

if self._config["ignore_allele_with_suffixes"]:
_, fields = glstring.split("*")
if fields in self._config["ignore_allele_with_suffixes"]:
return glstring

# Handle V2 to V3 mapping
if self.is_v2(glstring):
glstring = self._map_v2_to_v3(glstring)
Expand Down Expand Up @@ -789,6 +806,11 @@ def _is_valid(self, allele: str) -> bool:
if not alphanum_allele.isalnum():
return False

if self._config["ignore_allele_with_suffixes"]:
locus, fields = allele.split("*")
if fields in self._config["ignore_allele_with_suffixes"]:
return True

if not self._config["strict"]:
allele = self._get_non_strict_allele(allele)

Expand Down
16 changes: 15 additions & 1 deletion pyard/smart_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,19 @@


@functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE)
def smart_sort_comparator(a1, a2):
def smart_sort_comparator(a1, a2, ignore_suffixes=()):
"""
Natural sort 2 given alleles.
Python sorts strings lexicographically but HLA alleles need
to be sorted by numerical values in each field of the HLA nomenclature.
If allele suffixes are in ignore_suffixes, comparison results in that
appearing later.
:param a1: first allele
:param a2: second allele
:param ignore_suffix: tuple of suffixes
"""

# Check to see if they are the same alleles
Expand All @@ -54,6 +58,16 @@ def smart_sort_comparator(a1, a2):
else:
return -1

if ignore_suffixes and "*" in a1:
_, fields = a1.split("*")
if fields in ignore_suffixes:
return 1

if ignore_suffixes and "*" in a2:
_, fields = a2.split("*")
if fields in ignore_suffixes:
return -1

# remove any non-numerics
a1 = re.sub(expr_regex, "", a1)
a2 = re.sub(expr_regex, "", a2)
Expand Down
44 changes: 41 additions & 3 deletions scripts/pyard-reduce-csv
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import pandas as pd
import pyard
from pyard.db import similar_alleles
import pyard.drbx as drbx
from pyard.exceptions import PyArdError, InvalidTypingError
from pyard.exceptions import PyArdError, InvalidTypingError, InvalidAlleleError
from pyard.misc import get_data_dir, get_imgt_version, download_to_file


Expand Down Expand Up @@ -277,10 +277,45 @@ def create_reduced_slug(locus_typ1_typ2_pair):
return typ1


def apply_drbx(gl_string):
slugs = gl_string.split("^")
alleles = [allele for slug in slugs for allele in slug.split("+")]
drbx_loci = ("DRB3", "DRB4", "DRB5")

# Filter for DRBX alleles
drbx_alleles = [
allele
for allele in alleles
if any(allele.startswith(locus) for locus in drbx_loci)
]

# Create new GL string without DRBX alleles
filtered_slugs = []
for slug in slugs:
non_drbx_alleles = []
for allele in slug.split("+"):
if not any(allele.startswith(locus) for locus in drbx_loci):
non_drbx_alleles.append(allele)
if non_drbx_alleles:
filtered_slugs.append("+".join(non_drbx_alleles))

new_gl_string = "^".join(filtered_slugs)

drbx_slug = drbx.map_drbx(drbx_alleles, True)
gl_string_drbx = new_gl_string + "^" + "+".join(drbx_slug)

return gl_string_drbx


def reduce_glstring(glstring: str) -> str:
try:
return ard.redux(glstring, ard_config["redux_type"])
except InvalidTypingError as e:
ard_redux = ard.redux(glstring, ard_config["redux_type"])
if ard_config.get("map_drb345_to_drbx"):
glstring_drbx = apply_drbx(ard_redux)
return glstring_drbx
else:
return ard_redux
except (InvalidTypingError, InvalidAlleleError) as e:
print(f"Error reducing {glstring} \n", e.message, file=sys.stderr)
return "Failed"

Expand Down Expand Up @@ -391,6 +426,9 @@ if __name__ == "__main__":
"reduce_MAC": ard_config.get("reduce_MAC", True),
"map_drb345_to_drbx": ard_config.get("map_drb345_to_drbx", True),
"verbose_log": ard_config.get("verbose_log", True),
"ignore_allele_with_suffixes": tuple(
ard_config.get("ignore_allele_with_suffixes", tuple())
),
}
ard = pyard.init(
imgt_version=imgt_version,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.5.4
current_version = 1.5.5
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

setup(
name="py-ard",
version="1.5.4",
version="1.5.5",
description="ARD reduction for HLA with Python",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down
11 changes: 11 additions & 0 deletions tests/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,14 @@ def before_all(context):
context.ard_non_strict = pyard.init(
"3440", data_dir="/tmp/py-ard", config=non_strict_config
)

# Ignored allele suffixes
ignore_suffix_mode = {
"ignore_allele_with_suffixes": (
"NNNN",
"UUUU",
)
}
context.ard_ignore_suffix = pyard.init(
"3440", data_dir="/tmp/py-ard", config=ignore_suffix_mode
)
37 changes: 30 additions & 7 deletions tests/features/allele.feature
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,19 @@ Feature: Alleles

Scenario Outline: Allele validation in non-strict mode

Similar to reduction, handle non-strict mode when validating an allele.
The test version of IPD/IMGT-HLA database (see environment.py),
A*11:403 is invalid and A*24:329 is valid for A*24:329Q
Similar to reduction, handle non-strict mode when validating an allele.
The test version of IPD/IMGT-HLA database (see environment.py),
A*11:403 is invalid and A*24:329 is valid for A*24:329Q

Given the allele as <Allele>
When checking for validity of the allele in non-strict mode
Then the validness of the allele is <Validity>

Examples:
| Allele | Validity |
| A*11:403 | Invalid |
| A*24:329 | Valid |

| Allele | Validity |
| A*11:403 | Invalid |
| A*24:329 | Valid |
| DRBX*NNNN | Invalid |

Scenario Outline: Single field MICA, MICB Alleles

Expand All @@ -88,3 +88,26 @@ Feature: Alleles
| MICA*040 | lgx | MICA*040 |
| MICB*006 | lgx | MICB*006 |
| MICB*029 | lgx | MICB*029 |

Scenario Outline: Ignore reduction of DRBX*NNNN
Given the allele as <Allele>
When reducing on the <Level> level in ignore_suffix mode
Then the reduced allele is found to be <Redux Allele>

Examples:
| Allele | Level | Redux Allele |
| DRBX*NNNN | lgx | DRBX*NNNN |
| DRBX*NNNN | G | DRBX*NNNN |
| DRB1*UUUU | lg | DRB1*UUUU |

Scenario Outline: Allele validation in ignore_suffix mode

DRBX*NNNN is valid in ignore_suffix_mode

Given the allele as <Allele>
When checking for validity of the allele in ignore_suffix mode
Then the validness of the allele is <Validity>

Examples:
| Allele | Validity |
| DRBX*NNNN | Valid |
12 changes: 12 additions & 0 deletions tests/features/glstring.feature
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,15 @@ Feature: GL (Genotype List) Strings
| A*01:01~B*07:02+A*01:01~B*07:02 | G | A*01:01:01G~B*07:02:01G+A*01:01:01G~B*07:02:01G |
| A*01:01~B*07:02+A*01:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g |
| A*01:01~B*07:02+A*01:01~B*07:02\|A*02:01~B*07:02+A*02:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g\|A*02:01g~B*07:02g+A*02:01g~B*07:02g |


Scenario Outline: Ignore reduction of DRBX*NNNN in GL String
Given the allele as <GL String>
When reducing on the <Level> level in ignore_suffix mode
Then the reduced allele is found to be <Redux GL String>

Examples:
| GL String | Level | Redux GL String |
| DRBX*NNNN+DRB3*03:ECXMH | lgx | DRB3*03:01+DRBX*NNNN |
| DRB3*03:ECXMH+DRBX*NNNN | lgx | DRB3*03:01+DRBX*NNNN |
| DRB1*UUUU+DRB1*12:02 | G | DRB1*12:02:01G/DRB1*12:02:02G+DRB1*UUUU |
14 changes: 14 additions & 0 deletions tests/steps/redux_allele.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,20 @@ def step_impl(context):
context.is_valid = False


@when("reducing on the {level} level in ignore_suffix mode")
def step_impl(context, level):
context.level = level
context.redux_allele = context.ard_ignore_suffix.redux(context.allele, level)


@when("checking for validity of the allele in ignore_suffix mode")
def step_impl(context):
try:
context.is_valid = context.ard_ignore_suffix.validate(context.allele)
except InvalidAlleleError:
context.is_valid = False


@then("the validness of the allele is {validity}")
def step_impl(context, validity):
valid = validity == "Valid"
Expand Down