From 82f75b54e9f63548dd201a0aa0c87335a8ac8731 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 11 Aug 2025 11:45:57 -0500 Subject: [PATCH 1/3] Allow absent/pseudo-gene alleles to be ignored. - "ignore_allele_with_suffixes" config property can be passed to `ard.init()` to ignore any alleles with those fields. - add drbx reduction for gl strings in batch mode. - Updated feature files with new scenarios to support absence alleles --- pyard/ard.py | 27 ++++++++++++++++++-- pyard/smart_sort.py | 16 +++++++++++- scripts/pyard-reduce-csv | 44 ++++++++++++++++++++++++++++++--- tests/environment.py | 11 +++++++++ tests/features/allele.feature | 37 +++++++++++++++++++++------ tests/features/glstring.feature | 12 +++++++++ tests/steps/redux_allele.py | 14 +++++++++++ 7 files changed, 148 insertions(+), 13 deletions(-) diff --git a/pyard/ard.py b/pyard/ard.py index 88d8e7b..242207c 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -61,6 +61,7 @@ "verbose_log": False, "ARS_as_lg": False, "strict": True, + "ignore_allele_with_suffixes": (), } @@ -388,7 +389,12 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: non_empty_gls = filter(lambda s: s != "", gls) return delim.join( sorted( - non_empty_gls, key=functools.cmp_to_key(self.smart_sort_comparator) + non_empty_gls, + key=functools.cmp_to_key( + lambda a, b: self.smart_sort_comparator( + a, b, self._config["ignore_allele_with_suffixes"] + ) + ), ) ) @@ -399,7 +405,14 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: all_gls += gl.split(delim) unique_gls = filter(lambda s: s != "", set(all_gls)) return delim.join( - sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator)) + sorted( + unique_gls, + key=functools.cmp_to_key( + lambda a, b: self.smart_sort_comparator( + a, b, self._config["ignore_allele_with_suffixes"] + ) + ), + ) ) @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) @@ -445,6 +458,11 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str [self.redux(a, redux_type) for a in glstring.split("/")], "/" ) + if self._config["ignore_allele_with_suffixes"]: + _, fields = glstring.split("*") + if fields in self._config["ignore_allele_with_suffixes"]: + return glstring + # Handle V2 to V3 mapping if self.is_v2(glstring): glstring = self._map_v2_to_v3(glstring) @@ -789,6 +807,11 @@ def _is_valid(self, allele: str) -> bool: if not alphanum_allele.isalnum(): return False + if self._config["ignore_allele_with_suffixes"]: + locus, fields = allele.split("*") + if fields in self._config["ignore_allele_with_suffixes"]: + return True + if not self._config["strict"]: allele = self._get_non_strict_allele(allele) diff --git a/pyard/smart_sort.py b/pyard/smart_sort.py index ec63314..c94aa4b 100644 --- a/pyard/smart_sort.py +++ b/pyard/smart_sort.py @@ -32,15 +32,19 @@ @functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE) -def smart_sort_comparator(a1, a2): +def smart_sort_comparator(a1, a2, ignore_suffixes=()): """ Natural sort 2 given alleles. Python sorts strings lexicographically but HLA alleles need to be sorted by numerical values in each field of the HLA nomenclature. + If allele suffixes are in ignore_suffixes, comparison results in that + appearing later. + :param a1: first allele :param a2: second allele + :param ignore_suffix: tuple of suffixes """ # Check to see if they are the same alleles @@ -54,6 +58,16 @@ def smart_sort_comparator(a1, a2): else: return -1 + if ignore_suffixes and "*" in a1: + _, fields = a1.split("*") + if fields in ignore_suffixes: + return 1 + + if ignore_suffixes and "*" in a2: + _, fields = a2.split("*") + if fields in ignore_suffixes: + return -1 + # remove any non-numerics a1 = re.sub(expr_regex, "", a1) a2 = re.sub(expr_regex, "", a2) diff --git a/scripts/pyard-reduce-csv b/scripts/pyard-reduce-csv index affb6f4..7adbc9d 100755 --- a/scripts/pyard-reduce-csv +++ b/scripts/pyard-reduce-csv @@ -39,7 +39,7 @@ import pandas as pd import pyard from pyard.db import similar_alleles import pyard.drbx as drbx -from pyard.exceptions import PyArdError, InvalidTypingError +from pyard.exceptions import PyArdError, InvalidTypingError, InvalidAlleleError from pyard.misc import get_data_dir, get_imgt_version, download_to_file @@ -277,10 +277,45 @@ def create_reduced_slug(locus_typ1_typ2_pair): return typ1 +def apply_drbx(gl_string): + slugs = gl_string.split("^") + alleles = [allele for slug in slugs for allele in slug.split("+")] + drbx_loci = ("DRB3", "DRB4", "DRB5") + + # Filter for DRBX alleles + drbx_alleles = [ + allele + for allele in alleles + if any(allele.startswith(locus) for locus in drbx_loci) + ] + + # Create new GL string without DRBX alleles + filtered_slugs = [] + for slug in slugs: + non_drbx_alleles = [] + for allele in slug.split("+"): + if not any(allele.startswith(locus) for locus in drbx_loci): + non_drbx_alleles.append(allele) + if non_drbx_alleles: + filtered_slugs.append("+".join(non_drbx_alleles)) + + new_gl_string = "^".join(filtered_slugs) + + drbx_slug = drbx.map_drbx(drbx_alleles, True) + gl_string_drbx = new_gl_string + "^" + "+".join(drbx_slug) + + return gl_string_drbx + + def reduce_glstring(glstring: str) -> str: try: - return ard.redux(glstring, ard_config["redux_type"]) - except InvalidTypingError as e: + ard_redux = ard.redux(glstring, ard_config["redux_type"]) + if ard_config.get("map_drb345_to_drbx"): + glstring_drbx = apply_drbx(ard_redux) + return glstring_drbx + else: + return ard_redux + except (InvalidTypingError, InvalidAlleleError) as e: print(f"Error reducing {glstring} \n", e.message, file=sys.stderr) return "Failed" @@ -391,6 +426,9 @@ if __name__ == "__main__": "reduce_MAC": ard_config.get("reduce_MAC", True), "map_drb345_to_drbx": ard_config.get("map_drb345_to_drbx", True), "verbose_log": ard_config.get("verbose_log", True), + "ignore_allele_with_suffixes": tuple( + ard_config.get("ignore_allele_with_suffixes", tuple()) + ), } ard = pyard.init( imgt_version=imgt_version, diff --git a/tests/environment.py b/tests/environment.py index 926eaae..35be1d4 100644 --- a/tests/environment.py +++ b/tests/environment.py @@ -36,3 +36,14 @@ def before_all(context): context.ard_non_strict = pyard.init( "3440", data_dir="/tmp/py-ard", config=non_strict_config ) + + # Ignored allele suffixes + ignore_suffix_mode = { + "ignore_allele_with_suffixes": ( + "NNNN", + "UUUU", + ) + } + context.ard_ignore_suffix = pyard.init( + "3440", data_dir="/tmp/py-ard", config=ignore_suffix_mode + ) diff --git a/tests/features/allele.feature b/tests/features/allele.feature index fd8bd07..b1a21c3 100644 --- a/tests/features/allele.feature +++ b/tests/features/allele.feature @@ -60,19 +60,19 @@ Feature: Alleles Scenario Outline: Allele validation in non-strict mode - Similar to reduction, handle non-strict mode when validating an allele. - The test version of IPD/IMGT-HLA database (see environment.py), - A*11:403 is invalid and A*24:329 is valid for A*24:329Q + Similar to reduction, handle non-strict mode when validating an allele. + The test version of IPD/IMGT-HLA database (see environment.py), + A*11:403 is invalid and A*24:329 is valid for A*24:329Q Given the allele as When checking for validity of the allele in non-strict mode Then the validness of the allele is Examples: - | Allele | Validity | - | A*11:403 | Invalid | - | A*24:329 | Valid | - + | Allele | Validity | + | A*11:403 | Invalid | + | A*24:329 | Valid | + | DRBX*NNNN | Invalid | Scenario Outline: Single field MICA, MICB Alleles @@ -88,3 +88,26 @@ Feature: Alleles | MICA*040 | lgx | MICA*040 | | MICB*006 | lgx | MICB*006 | | MICB*029 | lgx | MICB*029 | + + Scenario Outline: Ignore reduction of DRBX*NNNN + Given the allele as + When reducing on the level in ignore_suffix mode + Then the reduced allele is found to be + + Examples: + | Allele | Level | Redux Allele | + | DRBX*NNNN | lgx | DRBX*NNNN | + | DRBX*NNNN | G | DRBX*NNNN | + | DRB1*UUUU | lg | DRB1*UUUU | + + Scenario Outline: Allele validation in ignore_suffix mode + + DRBX*NNNN is valid in ignore_suffix_mode + + Given the allele as + When checking for validity of the allele in ignore_suffix mode + Then the validness of the allele is + + Examples: + | Allele | Validity | + | DRBX*NNNN | Valid | diff --git a/tests/features/glstring.feature b/tests/features/glstring.feature index fa3b6a6..433e310 100644 --- a/tests/features/glstring.feature +++ b/tests/features/glstring.feature @@ -19,3 +19,15 @@ Feature: GL (Genotype List) Strings | A*01:01~B*07:02+A*01:01~B*07:02 | G | A*01:01:01G~B*07:02:01G+A*01:01:01G~B*07:02:01G | | A*01:01~B*07:02+A*01:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g | | A*01:01~B*07:02+A*01:01~B*07:02\|A*02:01~B*07:02+A*02:01~B*07:02 | lg | A*01:01g~B*07:02g+A*01:01g~B*07:02g\|A*02:01g~B*07:02g+A*02:01g~B*07:02g | + + + Scenario Outline: Ignore reduction of DRBX*NNNN in GL String + Given the allele as + When reducing on the level in ignore_suffix mode + Then the reduced allele is found to be + + Examples: + | GL String | Level | Redux GL String | + | DRBX*NNNN+DRB3*03:ECXMH | lgx | DRB3*03:01+DRBX*NNNN | + | DRB3*03:ECXMH+DRBX*NNNN | lgx | DRB3*03:01+DRBX*NNNN | + | DRB1*UUUU+DRB1*12:02 | G | DRB1*12:02:01G/DRB1*12:02:02G+DRB1*UUUU | diff --git a/tests/steps/redux_allele.py b/tests/steps/redux_allele.py index 525dfd7..a2ed4dd 100644 --- a/tests/steps/redux_allele.py +++ b/tests/steps/redux_allele.py @@ -126,6 +126,20 @@ def step_impl(context): context.is_valid = False +@when("reducing on the {level} level in ignore_suffix mode") +def step_impl(context, level): + context.level = level + context.redux_allele = context.ard_ignore_suffix.redux(context.allele, level) + + +@when("checking for validity of the allele in ignore_suffix mode") +def step_impl(context): + try: + context.is_valid = context.ard_ignore_suffix.validate(context.allele) + except InvalidAlleleError: + context.is_valid = False + + @then("the validness of the allele is {validity}") def step_impl(context, validity): valid = validity == "Valid" From 51d06cdff143f715d245b835462e4b73bbc0b41a Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 11 Aug 2025 12:01:33 -0500 Subject: [PATCH 2/3] `map_drb345_to_drbx` is only used for batch mode. --- pyard/ard.py | 1 - tests/features/allele.feature | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pyard/ard.py b/pyard/ard.py index 242207c..95fbf40 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -57,7 +57,6 @@ "reduce_MAC": True, "reduce_shortnull": True, "ping": True, - "map_drb345_to_drbx": True, "verbose_log": False, "ARS_as_lg": False, "strict": True, diff --git a/tests/features/allele.feature b/tests/features/allele.feature index b1a21c3..d96c785 100644 --- a/tests/features/allele.feature +++ b/tests/features/allele.feature @@ -102,7 +102,7 @@ Feature: Alleles Scenario Outline: Allele validation in ignore_suffix mode - DRBX*NNNN is valid in ignore_suffix_mode + DRBX*NNNN is valid in ignore_suffix_mode Given the allele as When checking for validity of the allele in ignore_suffix mode @@ -110,4 +110,4 @@ Feature: Alleles Examples: | Allele | Validity | - | DRBX*NNNN | Valid | + | DRBX*NNNN | Valid | From 97e980d9b7d5f9fef5368d562872474423f3123b Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Tue, 12 Aug 2025 09:31:29 -0500 Subject: [PATCH 3/3] =?UTF-8?q?Bump=20version:=201.5.4=20=E2=86=92=201.5.5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- api-spec.yaml | 2 +- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index c842f82..0195989 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=1.5.4 +ARG PY_ARD_VERSION=1.5.5 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index 88b17b3..935cfec 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "1.5.4" + version: "1.5.5" servers: - url: 'http://localhost:8080' tags: diff --git a/pyard/__init__.py b/pyard/__init__.py index 67f9863..3c81f16 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -26,7 +26,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "1.5.4" +__version__ = "1.5.5" def init( diff --git a/setup.cfg b/setup.cfg index a916518..659797f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.5.4 +current_version = 1.5.5 commit = True tag = True diff --git a/setup.py b/setup.py index cc7b1f7..3981ef1 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="1.5.4", + version="1.5.5", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown",