Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,52 @@
<a name="1.0.6"></a>
# [1.0.6 Validation of allele specific MAC codes](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.6) - 30 Oct 2023

- Use allele specific antigen code rules when validating MACs that cross antigen group similar to [MAC Service](https://hml.nmdp.org/macui/)
- Returns the original `InvalidAlleleError` instead of wrapping it in `InvalidTypingError` when an allele is not valid.

[Changes][1.0.6]


<a name="1.0.5"></a>
# [1.0.5 Non strict mode](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.5) - 04 Oct 2023

Supports non-strict mode makes valid alleles by adding expression characters to invalid alleles.

## Use non `strict` mode in config to reduce alleles that may be valid with expression characters.

```python
>>> my_configs = {'strict': False, 'verbose_log': True}
>>> import pyard
>>> ard = pyard.init(config=my_configs, load_mac=False)

>>> ard.redux('A*24:329', 'lgx')
A*24:329 is not valid. Using A*24:329Q
'A*24:329Q'

>>> ard.redux('DQB1*03:276', 'lgx')
DQB1*03:276 is not valid. Using DQB1*03:276N
'DQB1*03:01'
```

## Add non-strict and verbose modes to pyard CLI.

```bash
❯ pyard --gl "DQB1*03:276" -r lgx
Typing Error: DQB1*03:276 is not valid GL String.
DQB1*03:276 is not a valid Allele

❯ pyard --non-strict --gl "DQB1*03:276" -r lgx
DQB1*03:01

❯ pyard --non-strict --verbose --gl "DQB1*03:276" -r lgx
DQB1*03:276 is not valid. Using DQB1*03:276N
DQB1*03:01
```


[Changes][1.0.5]


<a name="1.0.4"></a>
# [Fixes when used without login user (1.0.4)](https://github.com/nmdp-bioinformatics/py-ard/releases/tag/1.0.4) - 19 Sep 2023

Expand Down Expand Up @@ -563,6 +612,8 @@ yes
[Changes][0.0.14]


[1.0.6]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.5...1.0.6
[1.0.5]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.4...1.0.5
[1.0.4]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.3...1.0.4
[1.0.3]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.2...1.0.3
[1.0.2]: https://github.com/nmdp-bioinformatics/py-ard/compare/1.0.1...1.0.2
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"

WORKDIR /app

ARG PY_ARD_VERSION=1.0.5
ARG PY_ARD_VERSION=1.0.6

COPY requirements.txt /app
RUN pip install --no-cache-dir --upgrade pip && \
Expand Down
2 changes: 1 addition & 1 deletion api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.3
info:
title: ARD Reduction
description: Reduce to ARD Level
version: "1.0.5"
version: "1.0.6"
servers:
- url: 'http://localhost:8080'
tags:
Expand Down
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .misc import get_imgt_db_versions as db_versions

__author__ = """NMDP Bioinformatics"""
__version__ = "1.0.5"
__version__ = "1.0.6"


def init(
Expand Down
53 changes: 38 additions & 15 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import re
import sqlite3
import sys
from collections import Counter
from typing import Iterable, List

from . import broad_splits, smart_sort
Expand Down Expand Up @@ -408,8 +409,8 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
)

# Handle MAC
if self._config["reduce_MAC"] and self.is_mac(glstring):
if db.is_valid_mac_code(self.db_connection, code):
if self._config["reduce_MAC"] and code.isalpha():
if self.is_mac(glstring): # Make sure it's a valid MAC
if HLA_regex.search(glstring):
# Remove HLA- prefix
allele_name = glstring.split("-")[1]
Expand All @@ -436,12 +437,7 @@ def validate(self, glstring):
:param glstring: GL String to validate
:return: boolean indicating success
"""
try:
return self._is_valid_gl(glstring)
except InvalidAlleleError as e:
raise InvalidTypingError(
f"{glstring} is not valid GL String. \n {e.message}", e
) from None
return self._is_valid_gl(glstring)

def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
if loc_antigen is None or code is None:
Expand Down Expand Up @@ -484,12 +480,39 @@ def is_mac(self, allele: str) -> bool:
:return: True if MAC
"""
if ":" in allele:
code = allele.split(":")[1]
try:
allele_split = allele.split(":")
if len(allele_split) == 2: # MACs have only single :
locus_antigen, code = allele_split
if code.isalpha():
return db.is_valid_mac_code(self.db_connection, code)
except sqlite3.OperationalError as e:
print("Error: ", e)
try:
alleles = db.mac_code_to_alleles(self.db_connection, code)
if alleles:
if any(map(lambda a: ":" in a, alleles)):
# allele specific antigen codes have ':' in the MAC mapping
# e.g. CFWRN -> 15:01/15:98/15:157/15:202/
# 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220
# Extract the antigens from the mapped alleles
antigen_groups = map(lambda a: a.split(":")[0], alleles)
# Rule 1: The 1st field with the most allele designations in the request is
# the 1st field of the allele code designation
# Rule 2: If there is a tie in the number of alleles designations sharing the 1st field,
# the 1st field with the lowest numeric value is selected.
antigen_counts = Counter(antigen_groups)
# Create a table of antigen to it's counts
# '15': 7
# '35': 6
# '51': 1
# Valid antigen is the first most common one.
# As it's presorted in db, also satisfies Rule 2.
valid_antigen = antigen_counts.most_common(1).pop()[0]
# Get antigen value 15 from 'DRB1*15'
provided_antigen = locus_antigen.split("*").pop()
# The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2
return provided_antigen == valid_antigen
# Valid when antigen group codes
return True
except sqlite3.OperationalError as e:
print("Error: ", e)
return False

def is_v2(self, allele: str) -> bool:
Expand Down Expand Up @@ -719,8 +742,8 @@ def expand_mac(self, mac_code: str):
:return: GL String of expanded alleles
:rtype: str
"""
locus_antigen, code = mac_code.split(":")
if db.is_valid_mac_code(self.db_connection, code):
if self.is_mac(mac_code): # Validate MAC first
locus_antigen, code = mac_code.split(":")
if HLA_regex.search(mac_code):
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
return "/".join(
Expand Down
41 changes: 22 additions & 19 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,21 +172,6 @@ def alleles_to_mac_code(
return None


def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
"""
Check db if the MAC code exists.

:param connection: db connection of type sqlite.Connection
:param code: MAC code
:return: code is MAC code ?
"""
mac_query = "SELECT count(alleles) from mac_codes where code = ?"
cursor = connection.execute(mac_query, (code,))
result = cursor.fetchone()
cursor.close()
return result[0] > 0


def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
"""
Look up Serology in the database and return corresponding list of alleles.
Expand Down Expand Up @@ -371,16 +356,16 @@ def load_dict(
return table_as_dict


def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str]:
def similar_alleles(connection: sqlite3.Connection, allele_prefix: str) -> Set[str]:
"""
Find similar alleles starting with the provided allele_name.
Find similar alleles starting with the provided prefix.

:param connection: db connection of type sqlite.Connection
:param allele_name: Allele name to use as a prefix to find similar alleles
:param allele_prefix: Allele name to use as a prefix to find similar alleles
:return: list of similar alleles
"""
query = "SELECT allele FROM alleles WHERE allele LIKE ?"
cursor = connection.execute(query, (f"{allele_name}%",))
cursor = connection.execute(query, (f"{allele_prefix}%",))
result = cursor.fetchall()
# fetchall() returns a list of tuples of results
# e.g. [('C*04:09N',)]
Expand All @@ -389,6 +374,24 @@ def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str
return alleles


def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
"""
Find similar MAC codes starting with the provided prefix.

:param connection: db connection of type sqlite.Connection
:param mac_prefix: MAC fragment to use as a prefix to find similar MACs
:return: list of similar MAC codes
"""
query = "SELECT code FROM mac_codes WHERE code LIKE ?"
cursor = connection.execute(query, (f"{mac_prefix}%",))
result = cursor.fetchall()
# fetchall() returns a list of tuples of results
# e.g. [('DJZUP',)]
# Get out the first value of the tuple from the result list
codes = set(map(lambda t: t[0], result))
return codes


def find_serology_for_allele(
connection: sqlite3.Connection, allele_name: str
) -> Dict[str, str]:
Expand Down
Loading