nmdp-bioinformatics · mmaiers-nmdp · Aug 14, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 12, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"
 
 WORKDIR /app
 
-ARG PY_ARD_VERSION=1.5.4
+ARG PY_ARD_VERSION=1.5.5
 
 COPY requirements.txt /app
 RUN pip install --no-cache-dir --upgrade pip && \

diff --git a/api-spec.yaml b/api-spec.yaml
@@ -2,7 +2,7 @@ openapi: 3.0.3
 info:
   title: ARD Reduction
   description: Reduce to ARD Level
-  version: "1.5.4"
+  version: "1.5.5"
 servers:
   - url: 'http://localhost:8080'
 tags:

diff --git a/pyard/__init__.py b/pyard/__init__.py
@@ -26,7 +26,7 @@
 from .misc import get_imgt_db_versions as db_versions
 
 __author__ = """NMDP Bioinformatics"""
-__version__ = "1.5.4"
+__version__ = "1.5.5"
 
 
 def init(

diff --git a/pyard/ard.py b/pyard/ard.py
@@ -57,10 +57,10 @@
     "reduce_MAC": True,
     "reduce_shortnull": True,
     "ping": True,
-    "map_drb345_to_drbx": True,
     "verbose_log": False,
     "ARS_as_lg": False,
     "strict": True,
+    "ignore_allele_with_suffixes": (),
 }
 
 
@@ -388,7 +388,12 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
             non_empty_gls = filter(lambda s: s != "", gls)
             return delim.join(
                 sorted(
-                    non_empty_gls, key=functools.cmp_to_key(self.smart_sort_comparator)
+                    non_empty_gls,
+                    key=functools.cmp_to_key(
+                        lambda a, b: self.smart_sort_comparator(
+                            a, b, self._config["ignore_allele_with_suffixes"]
+                        )
+                    ),
                 )
             )
 
@@ -399,7 +404,14 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
             all_gls += gl.split(delim)
         unique_gls = filter(lambda s: s != "", set(all_gls))
         return delim.join(
-            sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
+            sorted(
+                unique_gls,
+                key=functools.cmp_to_key(
+                    lambda a, b: self.smart_sort_comparator(
+                        a, b, self._config["ignore_allele_with_suffixes"]
+                    )
+                ),
+            )
         )
 
     @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
@@ -445,6 +457,11 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str
                 [self.redux(a, redux_type) for a in glstring.split("/")], "/"
             )
 
+        if self._config["ignore_allele_with_suffixes"]:
+            _, fields = glstring.split("*")
+            if fields in self._config["ignore_allele_with_suffixes"]:
+                return glstring
+
         # Handle V2 to V3 mapping
         if self.is_v2(glstring):
             glstring = self._map_v2_to_v3(glstring)
@@ -789,6 +806,11 @@ def _is_valid(self, allele: str) -> bool:
             if not alphanum_allele.isalnum():
                 return False
 
+            if self._config["ignore_allele_with_suffixes"]:
+                locus, fields = allele.split("*")
+                if fields in self._config["ignore_allele_with_suffixes"]:
+                    return True
+
         if not self._config["strict"]:
             allele = self._get_non_strict_allele(allele)
 

diff --git a/pyard/smart_sort.py b/pyard/smart_sort.py
@@ -32,15 +32,19 @@
 
 
 @functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE)
-def smart_sort_comparator(a1, a2):
+def smart_sort_comparator(a1, a2, ignore_suffixes=()):
     """
     Natural sort 2 given alleles.
 
     Python sorts strings lexicographically but HLA alleles need
     to be sorted by numerical values in each field of the HLA nomenclature.
 
+    If allele suffixes are in ignore_suffixes, comparison results in that
+    appearing later.
+
     :param a1: first allele
     :param a2: second allele
+    :param ignore_suffix: tuple of suffixes
     """
 
     # Check to see if they are the same alleles
@@ -54,6 +58,16 @@ def smart_sort_comparator(a1, a2):
         else:
             return -1
 
+    if ignore_suffixes and "*" in a1:
+        _, fields = a1.split("*")
+        if fields in ignore_suffixes:
+            return 1
+
+    if ignore_suffixes and "*" in a2:
+        _, fields = a2.split("*")
+        if fields in ignore_suffixes:
+            return -1
+
     # remove any non-numerics
     a1 = re.sub(expr_regex, "", a1)
     a2 = re.sub(expr_regex, "", a2)

diff --git a/scripts/pyard-reduce-csv b/scripts/pyard-reduce-csv
@@ -39,7 +39,7 @@ import pandas as pd
 import pyard
 from pyard.db import similar_alleles
 import pyard.drbx as drbx
-from pyard.exceptions import PyArdError, InvalidTypingError
+from pyard.exceptions import PyArdError, InvalidTypingError, InvalidAlleleError
 from pyard.misc import get_data_dir, get_imgt_version, download_to_file
 
 
@@ -277,10 +277,45 @@ def create_reduced_slug(locus_typ1_typ2_pair):
     return typ1
 
 
+def apply_drbx(gl_string):
+    slugs = gl_string.split("^")
+    alleles = [allele for slug in slugs for allele in slug.split("+")]
+    drbx_loci = ("DRB3", "DRB4", "DRB5")
+
+    # Filter for DRBX alleles
+    drbx_alleles = [
+        allele
+        for allele in alleles
+        if any(allele.startswith(locus) for locus in drbx_loci)
+    ]
+
+    # Create new GL string without DRBX alleles
+    filtered_slugs = []
+    for slug in slugs:
+        non_drbx_alleles = []
+        for allele in slug.split("+"):
+            if not any(allele.startswith(locus) for locus in drbx_loci):
+                non_drbx_alleles.append(allele)
+        if non_drbx_alleles:
+            filtered_slugs.append("+".join(non_drbx_alleles))
+
+    new_gl_string = "^".join(filtered_slugs)
+
+    drbx_slug = drbx.map_drbx(drbx_alleles, True)
+    gl_string_drbx = new_gl_string + "^" + "+".join(drbx_slug)
+
+    return gl_string_drbx
+
+
 def reduce_glstring(glstring: str) -> str:
     try:
-        return ard.redux(glstring, ard_config["redux_type"])
-    except InvalidTypingError as e:
+        ard_redux = ard.redux(glstring, ard_config["redux_type"])
+        if ard_config.get("map_drb345_to_drbx"):
+            glstring_drbx = apply_drbx(ard_redux)
+            return glstring_drbx
+        else:
+            return ard_redux
+    except (InvalidTypingError, InvalidAlleleError) as e:
         print(f"Error reducing {glstring} \n", e.message, file=sys.stderr)
         return "Failed"
 
@@ -391,6 +426,9 @@ if __name__ == "__main__":
         "reduce_MAC": ard_config.get("reduce_MAC", True),
         "map_drb345_to_drbx": ard_config.get("map_drb345_to_drbx", True),
         "verbose_log": ard_config.get("verbose_log", True),
+        "ignore_allele_with_suffixes": tuple(
+            ard_config.get("ignore_allele_with_suffixes", tuple())
+        ),
     }
     ard = pyard.init(
         imgt_version=imgt_version,

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.4
+current_version = 1.5.5
 commit = True
 tag = True
 

diff --git a/setup.py b/setup.py
@@ -36,7 +36,7 @@
 
 setup(
     name="py-ard",
-    version="1.5.4",
+    version="1.5.5",
     description="ARD reduction for HLA with Python",
     long_description=readme,
     long_description_content_type="text/markdown",

diff --git a/tests/environment.py b/tests/environment.py
@@ -36,3 +36,14 @@ def before_all(context):
     context.ard_non_strict = pyard.init(
         "3440", data_dir="/tmp/py-ard", config=non_strict_config
     )
+
+    # Ignored allele suffixes
+    ignore_suffix_mode = {
+        "ignore_allele_with_suffixes": (
+            "NNNN",
+            "UUUU",
+        )
+    }
+    context.ard_ignore_suffix = pyard.init(
+        "3440", data_dir="/tmp/py-ard", config=ignore_suffix_mode
+    )
diff --git a/tests/features/allele.feature b/tests/features/allele.feature
@@ -60,19 +60,19 @@ Feature: Alleles
 
   Scenario Outline: Allele validation in non-strict mode
 
-    Similar to reduction, handle non-strict mode when validating an allele.
-    The test version of IPD/IMGT-HLA database (see environment.py),
-    A*11:403 is invalid and A*24:329 is valid for A*24:329Q
+  Similar to reduction, handle non-strict mode when validating an allele.
+  The test version of IPD/IMGT-HLA database (see environment.py),
+  A*11:403 is invalid and A*24:329 is valid for A*24:329Q
 
     Given the allele as <Allele>
     When checking for validity of the allele in non-strict mode
     Then the validness of the allele is <Validity>
 
     Examples:
-      | Allele   | Validity |
-      | A*11:403 | Invalid  |
-      | A*24:329 | Valid    |
-
+      | Allele    | Validity |
+      | A*11:403  | Invalid  |
+      | A*24:329  | Valid    |
+      | DRBX*NNNN | Invalid  |
 
   Scenario Outline: Single field MICA, MICB Alleles
 
@@ -88,3 +88,26 @@ Feature: Alleles
       | MICA*040 | lgx   | MICA*040     |
       | MICB*006 | lgx   | MICB*006     |
       | MICB*029 | lgx   | MICB*029     |
+
+  Scenario Outline: Ignore reduction of DRBX*NNNN
+    Given the allele as <Allele>
+    When reducing on the <Level> level in ignore_suffix mode
+    Then the reduced allele is found to be <Redux Allele>
+
+    Examples:
+      | Allele    | Level | Redux Allele |
+      | DRBX*NNNN | lgx   | DRBX*NNNN    |
+      | DRBX*NNNN | G     | DRBX*NNNN    |
+      | DRB1*UUUU | lg    | DRB1*UUUU    |
+
+  Scenario Outline: Allele validation in ignore_suffix mode
+
+  DRBX*NNNN is valid in ignore_suffix_mode
+
+    Given the allele as <Allele>
+    When checking for validity of the allele in ignore_suffix mode
+    Then the validness of the allele is <Validity>
+
+    Examples:
+      | Allele    | Validity |
+      | DRBX*NNNN | Valid    |
diff --git a/tests/features/glstring.feature b/tests/features/glstring.feature
@@ -19,3 +19,15 @@ Feature: GL (Genotype List) Strings
       | A*01:01~B*07:02+A*01:01~B*07:02                                  | G     | A*01:01:01G~B*07:02:01G+A*01:01:01G~B*07:02:01G                          |
       | A*01:01~B*07:02+A*01:01~B*07:02                                  | lg    | A*01:01g~B*07:02g+A*01:01g~B*07:02g                                      |
       | A*01:01~B*07:02+A*01:01~B*07:02\|A*02:01~B*07:02+A*02:01~B*07:02 | lg    | A*01:01g~B*07:02g+A*01:01g~B*07:02g\|A*02:01g~B*07:02g+A*02:01g~B*07:02g |
+
+
+  Scenario Outline: Ignore reduction of DRBX*NNNN in GL String
+    Given the allele as <GL String>
+    When reducing on the <Level> level in ignore_suffix mode
+    Then the reduced allele is found to be <Redux GL String>
+
+    Examples:
+      | GL String               | Level | Redux GL String                         |
+      | DRBX*NNNN+DRB3*03:ECXMH | lgx   | DRB3*03:01+DRBX*NNNN                    |
+      | DRB3*03:ECXMH+DRBX*NNNN | lgx   | DRB3*03:01+DRBX*NNNN                    |
+      | DRB1*UUUU+DRB1*12:02    | G     | DRB1*12:02:01G/DRB1*12:02:02G+DRB1*UUUU |
diff --git a/tests/steps/redux_allele.py b/tests/steps/redux_allele.py
@@ -126,6 +126,20 @@ def step_impl(context):
         context.is_valid = False
 
 
+@when("reducing on the {level} level in ignore_suffix mode")
+def step_impl(context, level):
+    context.level = level
+    context.redux_allele = context.ard_ignore_suffix.redux(context.allele, level)
+
+
+@when("checking for validity of the allele in ignore_suffix mode")
+def step_impl(context):
+    try:
+        context.is_valid = context.ard_ignore_suffix.validate(context.allele)
+    except InvalidAlleleError:
+        context.is_valid = False
+
+
 @then("the validness of the allele is {validity}")
 def step_impl(context, validity):
     valid = validity == "Valid"