diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2a4b042..471c631 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,52 @@
+
+# [1.0.6 Validation of allele specific MAC codes](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.6) - 30 Oct 2023
+
+- Use allele specific antigen code rules when validating MACs that cross antigen group similar to [MAC Service](https://hml.nmdp.org/macui/)
+- Returns the original `InvalidAlleleError` instead of wrapping it in `InvalidTypingError` when an allele is not valid.
+
+[Changes][1.0.6]
+
+
+
+# [1.0.5 Non strict mode](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.5) - 04 Oct 2023
+
+Supports non-strict mode makes valid alleles by adding expression characters to invalid alleles.
+
+## Use non `strict` mode in config to reduce alleles that may be valid with expression characters.
+
+```python
+>>> my_configs = {'strict': False, 'verbose_log': True}
+>>> import pyard
+>>> ard = pyard.init(config=my_configs, load_mac=False)
+
+>>> ard.redux('A*24:329', 'lgx')
+A*24:329 is not valid. Using A*24:329Q
+'A*24:329Q'
+
+>>> ard.redux('DQB1*03:276', 'lgx')
+DQB1*03:276 is not valid. Using DQB1*03:276N
+'DQB1*03:01'
+```
+
+## Add non-strict and verbose modes to pyard CLI.
+
+```bash
+❯ pyard --gl "DQB1*03:276" -r lgx
+Typing Error: DQB1*03:276 is not valid GL String.
+ DQB1*03:276 is not a valid Allele
+
+❯ pyard --non-strict --gl "DQB1*03:276" -r lgx
+DQB1*03:01
+
+❯ pyard --non-strict --verbose --gl "DQB1*03:276" -r lgx
+DQB1*03:276 is not valid. Using DQB1*03:276N
+DQB1*03:01
+```
+
+
+[Changes][1.0.5]
+
+
# [Fixes when used without login user (1.0.4)](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.4) - 19 Sep 2023
@@ -563,6 +612,8 @@ yes
[Changes][0.0.14]
+[1.0.6]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.5...1.0.6
+[1.0.5]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.4...1.0.5
[1.0.4]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.3...1.0.4
[1.0.3]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.2...1.0.3
[1.0.2]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.1...1.0.2
diff --git a/Dockerfile b/Dockerfile
index ac47d19..8b68907 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"
WORKDIR /app
-ARG PY_ARD_VERSION=1.0.5
+ARG PY_ARD_VERSION=1.0.6
COPY requirements.txt /app
RUN pip install --no-cache-dir --upgrade pip && \
diff --git a/api-spec.yaml b/api-spec.yaml
index 0218e7d..e6eeb34 100644
--- a/api-spec.yaml
+++ b/api-spec.yaml
@@ -2,7 +2,7 @@ openapi: 3.0.3
info:
title: ARD Reduction
description: Reduce to ARD Level
- version: "1.0.5"
+ version: "1.0.6"
servers:
- url: 'http://localhost:8080'
tags:
diff --git a/pyard/__init__.py b/pyard/__init__.py
index ab12b6c..05f96f0 100644
--- a/pyard/__init__.py
+++ b/pyard/__init__.py
@@ -27,7 +27,7 @@
from .misc import get_imgt_db_versions as db_versions
__author__ = """NMDP Bioinformatics"""
-__version__ = "1.0.5"
+__version__ = "1.0.6"
def init(
diff --git a/pyard/ard.py b/pyard/ard.py
index fb8d618..6881393 100644
--- a/pyard/ard.py
+++ b/pyard/ard.py
@@ -25,6 +25,7 @@
import re
import sqlite3
import sys
+from collections import Counter
from typing import Iterable, List
from . import broad_splits, smart_sort
@@ -408,8 +409,8 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
)
# Handle MAC
- if self._config["reduce_MAC"] and self.is_mac(glstring):
- if db.is_valid_mac_code(self.db_connection, code):
+ if self._config["reduce_MAC"] and code.isalpha():
+ if self.is_mac(glstring): # Make sure it's a valid MAC
if HLA_regex.search(glstring):
# Remove HLA- prefix
allele_name = glstring.split("-")[1]
@@ -436,12 +437,7 @@ def validate(self, glstring):
:param glstring: GL String to validate
:return: boolean indicating success
"""
- try:
- return self._is_valid_gl(glstring)
- except InvalidAlleleError as e:
- raise InvalidTypingError(
- f"{glstring} is not valid GL String. \n {e.message}", e
- ) from None
+ return self._is_valid_gl(glstring)
def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
if loc_antigen is None or code is None:
@@ -484,12 +480,39 @@ def is_mac(self, allele: str) -> bool:
:return: True if MAC
"""
if ":" in allele:
- code = allele.split(":")[1]
- try:
+ allele_split = allele.split(":")
+ if len(allele_split) == 2: # MACs have only single :
+ locus_antigen, code = allele_split
if code.isalpha():
- return db.is_valid_mac_code(self.db_connection, code)
- except sqlite3.OperationalError as e:
- print("Error: ", e)
+ try:
+ alleles = db.mac_code_to_alleles(self.db_connection, code)
+ if alleles:
+ if any(map(lambda a: ":" in a, alleles)):
+ # allele specific antigen codes have ':' in the MAC mapping
+ # e.g. CFWRN -> 15:01/15:98/15:157/15:202/
+ # 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220
+ # Extract the antigens from the mapped alleles
+ antigen_groups = map(lambda a: a.split(":")[0], alleles)
+ # Rule 1: The 1st field with the most allele designations in the request is
+ # the 1st field of the allele code designation
+ # Rule 2: If there is a tie in the number of alleles designations sharing the 1st field,
+ # the 1st field with the lowest numeric value is selected.
+ antigen_counts = Counter(antigen_groups)
+ # Create a table of antigen to it's counts
+ # '15': 7
+ # '35': 6
+ # '51': 1
+ # Valid antigen is the first most common one.
+ # As it's presorted in db, also satisfies Rule 2.
+ valid_antigen = antigen_counts.most_common(1).pop()[0]
+ # Get antigen value 15 from 'DRB1*15'
+ provided_antigen = locus_antigen.split("*").pop()
+ # The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2
+ return provided_antigen == valid_antigen
+ # Valid when antigen group codes
+ return True
+ except sqlite3.OperationalError as e:
+ print("Error: ", e)
return False
def is_v2(self, allele: str) -> bool:
@@ -719,8 +742,8 @@ def expand_mac(self, mac_code: str):
:return: GL String of expanded alleles
:rtype: str
"""
- locus_antigen, code = mac_code.split(":")
- if db.is_valid_mac_code(self.db_connection, code):
+ if self.is_mac(mac_code): # Validate MAC first
+ locus_antigen, code = mac_code.split(":")
if HLA_regex.search(mac_code):
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
return "/".join(
diff --git a/pyard/db.py b/pyard/db.py
index e08b506..9973997 100644
--- a/pyard/db.py
+++ b/pyard/db.py
@@ -172,21 +172,6 @@ def alleles_to_mac_code(
return None
-def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
- """
- Check db if the MAC code exists.
-
- :param connection: db connection of type sqlite.Connection
- :param code: MAC code
- :return: code is MAC code ?
- """
- mac_query = "SELECT count(alleles) from mac_codes where code = ?"
- cursor = connection.execute(mac_query, (code,))
- result = cursor.fetchone()
- cursor.close()
- return result[0] > 0
-
-
def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
"""
Look up Serology in the database and return corresponding list of alleles.
@@ -371,16 +356,16 @@ def load_dict(
return table_as_dict
-def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str]:
+def similar_alleles(connection: sqlite3.Connection, allele_prefix: str) -> Set[str]:
"""
- Find similar alleles starting with the provided allele_name.
+ Find similar alleles starting with the provided prefix.
:param connection: db connection of type sqlite.Connection
- :param allele_name: Allele name to use as a prefix to find similar alleles
+ :param allele_prefix: Allele name to use as a prefix to find similar alleles
:return: list of similar alleles
"""
query = "SELECT allele FROM alleles WHERE allele LIKE ?"
- cursor = connection.execute(query, (f"{allele_name}%",))
+ cursor = connection.execute(query, (f"{allele_prefix}%",))
result = cursor.fetchall()
# fetchall() returns a list of tuples of results
# e.g. [('C*04:09N',)]
@@ -389,6 +374,24 @@ def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str
return alleles
+def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
+ """
+ Find similar MAC codes starting with the provided prefix.
+
+ :param connection: db connection of type sqlite.Connection
+ :param mac_prefix: MAC fragment to use as a prefix to find similar MACs
+ :return: list of similar MAC codes
+ """
+ query = "SELECT code FROM mac_codes WHERE code LIKE ?"
+ cursor = connection.execute(query, (f"{mac_prefix}%",))
+ result = cursor.fetchall()
+ # fetchall() returns a list of tuples of results
+ # e.g. [('DJZUP',)]
+ # Get out the first value of the tuple from the result list
+ codes = set(map(lambda t: t[0], result))
+ return codes
+
+
def find_serology_for_allele(
connection: sqlite3.Connection, allele_name: str
) -> Dict[str, str]:
diff --git a/scripts/pyard b/scripts/pyard
index 035c9ee..8f182d6 100755
--- a/scripts/pyard
+++ b/scripts/pyard
@@ -22,13 +22,92 @@
# > http://www.opensource.org/licenses/lgpl-license.php
#
import argparse
+import functools
import sys
+from pyard import smart_sort
from pyard.constants import VALID_REDUCTION_TYPES
import pyard.misc
+from pyard.db import similar_alleles, similar_mac
from pyard.exceptions import InvalidAlleleError, InvalidTypingError, InvalidMACError
from pyard.misc import get_data_dir, get_imgt_version
+
+def find_similar_alleles(ard, prefix):
+ if "*" in prefix: # Only for those that have locus
+ locus, fields = prefix.split("*")
+ if fields: # Only if at least a field is specified after *
+ if len(fields.split(":")) == 2: # Check for MACs
+ first_field, mac_prefix = fields.split(":")
+ if mac_prefix.isalpha():
+ similar_mac_names = similar_mac(ard.db_connection, mac_prefix)
+ if similar_mac_names:
+ locus_prefix = f"{locus}*{first_field}"
+ # TODO: validate all the mac codes with the prefix
+ # show only the valid macs
+ for code in sorted(similar_mac_names):
+ print(f"{locus_prefix}:{code}")
+ else:
+ # Nothing after *
+ sys.exit(2)
+ else:
+ # No *
+ sys.exit(1)
+
+ # find similar alleles
+ similar_allele_names = similar_alleles(ard.db_connection, prefix)
+ if similar_allele_names:
+ for allele in sorted(
+ similar_allele_names,
+ key=functools.cmp_to_key(smart_sort.smart_sort_comparator),
+ ):
+ print(allele)
+ sys.exit(0)
+
+
+def lookup_mac_codes():
+ global e
+ try:
+ mac = ard.lookup_mac(args.lookup_mac)
+ print(mac)
+ except InvalidMACError as e:
+ print(e.message, file=sys.stderr)
+ sys.exit(0)
+
+
+def expand_mac_code():
+ global allele_list, e
+ try:
+ allele_list = ard.expand_mac(args.expand_mac)
+ print(allele_list)
+ except InvalidMACError as e:
+ print(e.message, file=sys.stderr)
+ sys.exit(0)
+
+
+def find_broad_splits():
+ mapping = pyard.find_broad_splits(args.splits)
+ if mapping:
+ print(f"{mapping[0]} = {'/'.join(mapping[1])}")
+ sys.exit(0)
+
+
+def show_version():
+ version = ard.get_db_version()
+ print(f"IPD-IMGT/HLA version:", version)
+ print(f"py-ard version:", pyard.__version__)
+ sys.exit(0)
+
+
+def perform_cwd_redux():
+ global cwd_redux
+ if args.validate:
+ ard.validate(args.cwd)
+ cwd_redux = ard.cwd_redux(args.cwd)
+ print(cwd_redux)
+ sys.exit(0)
+
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""
@@ -76,6 +155,11 @@ if __name__ == "__main__":
parser.add_argument(
"--lookup-mac", dest="lookup_mac", help="Lookup MAC for an Allele List"
)
+ parser.add_argument(
+ "--similar",
+ dest="similar_allele",
+ help="Find Similar Alleles with given prefix",
+ )
parser.add_argument(
"--non-strict",
dest="non_strict",
@@ -100,45 +184,32 @@ if __name__ == "__main__":
ard = pyard.init(imgt_version=imgt_version, data_dir=data_dir, config=new_config)
+ # Handle --version option
if args.version:
- version = ard.get_db_version()
- print(f"IPD-IMGT/HLA version:", version)
- print(f"py-ard version:", pyard.__version__)
- sys.exit(0)
+ show_version()
+ # Handle --splits option
if args.splits:
- mapping = pyard.find_broad_splits(args.splits)
- if mapping:
- print(f"{mapping[0]} = {'/'.join(mapping[1])}")
- sys.exit(0)
+ find_broad_splits()
# Handle --expand-mac option
if args.expand_mac:
- try:
- allele_list = ard.expand_mac(args.expand_mac)
- print(allele_list)
- except InvalidMACError as e:
- print(e.message, file=sys.stderr)
- sys.exit(0)
+ expand_mac_code()
# Handle --lookup-mac option
if args.lookup_mac:
- try:
- mac = ard.lookup_mac(args.lookup_mac)
- print(mac)
- except InvalidMACError as e:
- print(e.message, file=sys.stderr)
- sys.exit(0)
+ lookup_mac_codes()
+
+ # Handle --similar option
+ if args.similar_allele:
+ find_similar_alleles(ard, args.similar_allele)
try:
+ if args.cwd:
+ perform_cwd_redux()
+
if args.validate and args.gl_string:
ard.validate(args.gl_string)
- if args.cwd:
- if args.validate:
- ard.validate(args.cwd)
- cwd_redux = ard.cwd_redux(args.cwd)
- print(cwd_redux)
- sys.exit(0)
if args.redux_type:
print(ard.redux(args.gl_string, args.redux_type))
@@ -155,6 +226,9 @@ if __name__ == "__main__":
except InvalidTypingError as e:
print("Typing Error:", e.message, file=sys.stderr)
sys.exit(2)
+ except InvalidMACError as e:
+ print("MAC Error:", e.message, file=sys.stderr)
+ sys.exit(3)
else:
# Remove ard and close db connection
del ard
diff --git a/setup.cfg b/setup.cfg
index 6978158..d0b4c39 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.5
+current_version = 1.0.6
commit = True
tag = True
diff --git a/setup.py b/setup.py
index 0ecce82..46f7de5 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@
setup(
name="py-ard",
- version="1.0.5",
+ version="1.0.6",
description="ARD reduction for HLA with Python",
long_description=readme,
long_description_content_type="text/markdown",
diff --git a/tests/features/mac.feature b/tests/features/mac.feature
index 4cc3160..f517a73 100644
--- a/tests/features/mac.feature
+++ b/tests/features/mac.feature
@@ -48,3 +48,26 @@ Feature: MAC (Multiple Allele Code)
| A*01:01/A*01:02 | A*01:AB |
| HLA-A*25:01/HLA-A*26:01 | HLA-A*25:BYHR |
| HLA-A*02:01/HLA-A*02:09/HLA-A*02:43N | HLA-A*02:GNF |
+
+
+ Scenario Outline: Validate allele specific antigen MACs
+
+ MAC validation rules for allele specific antigen codes:
+ - The 1st field with the most allele designations in the request is
+ the 1st field of the allele code designation
+ - If there is a tie in the number of alleles designations sharing the 1st field,
+ the 1st field with the lowest numeric value is selected.
+
+
+ Given the MAC code is
+ When checking for validity of the MAC
+ Then the validness is
+
+ Examples:
+ | MAC | Validity |
+ | DRB1*07:DFJR | Invalid |
+ | DRB1*15:DFJR | Valid |
+ | DPB1*08:BHHE | Invalid |
+ | DPB1*19:BHHE | Valid |
+ | A*31:CMZEY | Invalid |
+ | A*02:CMZEY | Valid |
diff --git a/tests/steps/mac.py b/tests/steps/mac.py
index 91f1919..64069d5 100644
--- a/tests/steps/mac.py
+++ b/tests/steps/mac.py
@@ -1,6 +1,8 @@
from behave import *
from hamcrest import assert_that, is_
+from pyard.exceptions import InvalidAlleleError
+
@given("the MAC code is {mac_code}")
def step_impl(context, mac_code):
@@ -30,3 +32,17 @@ def step_impl(context):
@then("the decoded MAC is {mac_code}")
def step_impl(context, mac_code):
assert_that(context.mac_code, is_(mac_code))
+
+
+@when("checking for validity of the MAC")
+def step_impl(context):
+ try:
+ context.is_valid = context.ard.validate(context.mac_code)
+ except InvalidAlleleError:
+ context.is_valid = False
+
+
+@then("the validness is {validity}")
+def step_impl(context, validity):
+ valid = validity == "Valid"
+ assert_that(context.is_valid, is_(valid))
diff --git a/tests/test_pyard.py b/tests/test_pyard.py
index 0dcf87a..39dbd60 100644
--- a/tests/test_pyard.py
+++ b/tests/test_pyard.py
@@ -34,7 +34,7 @@
import pyard
from pyard.constants import DEFAULT_CACHE_SIZE
-from pyard.exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
+from pyard.exceptions import InvalidAlleleError
from pyard.misc import validate_reduction_type
@@ -135,15 +135,15 @@ def test_redux_types(self):
validate_reduction_type("XX")
def test_empty_allele(self):
- with self.assertRaises(InvalidTypingError):
+ with self.assertRaises(InvalidAlleleError):
self.ard.redux("A*", "lgx")
def test_fp_allele(self):
- with self.assertRaises(InvalidTypingError):
+ with self.assertRaises(InvalidAlleleError):
self.ard.redux("A*0.123", "lgx")
def test_empty_fields(self):
- with self.assertRaises(InvalidTypingError):
+ with self.assertRaises(InvalidAlleleError):
# : without any data
self.ard.redux("DQA1*01:01:01:G", "lgx")
@@ -152,7 +152,7 @@ def test_invalid_serology(self):
serology_a10 = self.ard.redux("A10", "lgx")
self.assertEqual(serology_a10.split("/")[0], "A*25:01")
# And A100 isn't a valid typing
- with self.assertRaises(InvalidTypingError):
+ with self.assertRaises(InvalidAlleleError):
self.ard.redux("A100", "lgx")
def test_allele_duplicated(self):