Skip to content

Commit 4665c1d

Browse files
authored
Merge pull request #303 from pbashyal-nmdp/support-associated-serology
Support Serology Associated Antigens
2 parents 25d798d + 7b17e5a commit 4665c1d

File tree

14 files changed

+146
-65
lines changed

14 files changed

+146
-65
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"
44

55
WORKDIR /app
66

7-
ARG PY_ARD_VERSION=1.0.11
7+
ARG PY_ARD_VERSION=1.1.0
88

99
COPY requirements.txt /app
1010
RUN pip install --no-cache-dir --upgrade pip && \

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,10 @@ dist: clean ## builds source and wheel package
9898
ls -l dist
9999

100100
docker-build: ## build a docker image for the service
101-
docker build -t pyard-service:latest .
101+
docker build --platform=linux/amd64 -t nmdpbioinformatics/pyard-service:latest .
102102

103103
docker: docker-build ## build a docker image and run the service
104-
docker run --rm --name pyard-service -p 8080:8080 pyard-service:latest
104+
docker run --platform=linux/amd64 --rm --name pyard-service -p 8080:8080 nmdpbioinformatics/pyard-service:latest
105105

106106
install: clean ## install the package to the active Python's site-packages
107107
pip install --upgrade pip

api-spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ openapi: 3.0.3
22
info:
33
title: ARD Reduction
44
description: Reduce to ARD Level
5-
version: "1.0.11"
5+
version: "1.1.0"
66
servers:
77
- url: 'http://localhost:8080'
88
tags:

pyard/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,11 @@
2222
# > http://www.opensource.org/licenses/lgpl-license.php
2323
#
2424
from .blender import blender as dr_blender
25-
from .broad_splits import find_splits as find_broad_splits
2625
from .constants import DEFAULT_CACHE_SIZE
2726
from .misc import get_imgt_db_versions as db_versions
2827

2928
__author__ = """NMDP Bioinformatics"""
30-
__version__ = "1.0.11"
29+
__version__ = "1.1.0"
3130

3231

3332
def init(

pyard/ard.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,16 @@ def __init__(
113113
)
114114

115115
# Load Serology mappings
116-
broad_splits.broad_splits_ser_mapping = (
117-
dr.generate_serology_broad_split_mapping(self.db_connection, imgt_version)
116+
broad_splits_mapping, associated_mapping = dr.generate_broad_splits_mapping(
117+
self.db_connection, imgt_version
118+
)
119+
self.serology_mapping = broad_splits.SerologyMapping(
120+
broad_splits_mapping, associated_mapping
121+
)
122+
123+
dr.generate_serology_mapping(
124+
self.db_connection, self.serology_mapping, imgt_version
118125
)
119-
dr.generate_serology_mapping(self.db_connection, imgt_version)
120126
# Load V2 to V3 mappings
121127
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
122128
# Save IMGT database version
@@ -608,6 +614,12 @@ def is_exp_allele(self, allele):
608614
"""
609615
return allele in self.allele_group.exp_alleles
610616

617+
def find_broad_splits(self, allele) -> tuple:
618+
return self.serology_mapping.find_splits(allele)
619+
620+
def find_associated_antigen(self, serology) -> str:
621+
return self.serology_mapping.serology_associated_map.get(serology, serology)
622+
611623
def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
612624
"""
613625
Look up allele code in database and generate alleles

pyard/broad_splits.py

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
# > http://www.fsf.org/licensing/licenses/lgpl.html
2121
# > http://www.opensource.org/licenses/lgpl-license.php
2222
#
23-
import re
23+
24+
from pyard.constants import HLA_regex
2425

2526
#
2627
# Broad, Splits and Associated Antigens
@@ -46,35 +47,35 @@
4647
"DRB1*06": ["DRB1*13", "DRB1*14"],
4748
}
4849

49-
# Loaded at runtime
50-
broad_splits_ser_mapping = None
51-
52-
HLA_regex = re.compile("^HLA-")
53-
5450

55-
def find_splits(allele: str) -> tuple:
56-
if HLA_regex.search(allele):
57-
prefix = True
58-
allele_name = allele.split("-")[1]
59-
else:
60-
prefix = False
61-
allele_name = allele
51+
class SerologyMapping:
52+
def __init__(self, broad_splits_mapping, associated_mapping):
53+
self.broad_splits_map = broad_splits_mapping
54+
self.serology_associated_map = associated_mapping
6255

63-
if "*" in allele_name:
64-
mapping = broad_splits_dna_mapping
65-
else:
66-
mapping = broad_splits_ser_mapping
56+
def find_splits(self, allele: str) -> tuple:
57+
if HLA_regex.search(allele):
58+
prefix = True
59+
allele_name = allele.split("-")[1]
60+
else:
61+
prefix = False
62+
allele_name = allele
6763

68-
if allele_name in mapping:
69-
return _get_mapping(allele_name, mapping, prefix)
64+
if "*" in allele_name:
65+
mapping = broad_splits_dna_mapping
66+
else:
67+
mapping = self.broad_splits_map
7068

71-
for broad in mapping:
72-
if allele_name in mapping[broad]:
73-
return _get_mapping(broad, mapping, prefix)
69+
if allele_name in mapping:
70+
return self._get_mapping(allele_name, mapping, prefix)
7471

72+
for broad in mapping:
73+
if allele_name in mapping[broad]:
74+
return self._get_mapping(broad, mapping, prefix)
7575

76-
def _get_mapping(broad, mapping, prefix):
77-
if prefix:
78-
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
79-
else:
80-
return broad, mapping[broad]
76+
@staticmethod
77+
def _get_mapping(broad, mapping, prefix):
78+
if prefix:
79+
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
80+
else:
81+
return broad, mapping[broad]

pyard/data_repository.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626

2727
import pyard.load
2828
from pyard.smart_sort import smart_sort_comparator
29-
from . import db, broad_splits
29+
from . import db
30+
from .broad_splits import broad_splits_dna_mapping
3031
from .load import (
3132
load_g_group,
3233
load_p_group,
@@ -216,7 +217,7 @@ def generate_alleles_and_xx_codes_and_who(
216217
xx_codes = xx_df.groupby(["1d"]).apply(lambda x: list(x["Allele"])).to_dict()
217218

218219
# Update xx codes with broads and splits
219-
for broad, splits in broad_splits.broad_splits_dna_mapping.items():
220+
for broad, splits in broad_splits_dna_mapping.items():
220221
for split in splits:
221222
if broad in xx_codes:
222223
xx_codes[broad].extend(xx_codes[split])
@@ -354,7 +355,9 @@ def to_serological_name(locus_name: str):
354355
return sero_name
355356

356357

357-
def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
358+
def generate_serology_mapping(
359+
db_connection: sqlite3.Connection, serology_mapping, imgt_version
360+
):
358361
if not db.table_exists(db_connection, "serology_mapping"):
359362
df_sero = load_serology_mappings(imgt_version)
360363

@@ -396,7 +399,7 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
396399

397400
# map alleles for split serology to their corresponding broad
398401
# Update xx codes with broads and splits
399-
for broad, splits in broad_splits.broad_splits_ser_mapping.items():
402+
for broad, splits in serology_mapping.broad_splits_map.items():
400403
for split in splits:
401404
try:
402405
sero_mapping[broad] = "/".join(
@@ -450,15 +453,19 @@ def get_db_version(db_connection: sqlite3.Connection):
450453
return db.get_user_version(db_connection)
451454

452455

453-
def generate_serology_broad_split_mapping(
454-
db_connection: sqlite3.Connection, imgt_version
455-
):
456+
def generate_broad_splits_mapping(db_connection: sqlite3.Connection, imgt_version):
456457
if not db.table_exists(db_connection, "serology_broad_split_mapping"):
457-
sero_mapping = pyard.load.load_serology_broad_split_mapping(imgt_version)
458+
sero_mapping, associated_mapping = pyard.load.load_serology_broad_split_mapping(
459+
imgt_version
460+
)
458461
db.save_serology_broad_split_mappings(db_connection, sero_mapping)
459-
return sero_mapping
462+
db.save_serology_associated_mappings(db_connection, associated_mapping)
463+
return sero_mapping, associated_mapping
464+
465+
sero_mapping = db.load_serology_broad_split_mappings(db_connection)
466+
associated_mapping = db.load_serology_associated_mappings(db_connection)
460467

461-
return db.load_serology_broad_split_mappings(db_connection)
468+
return sero_mapping, associated_mapping
462469

463470

464471
def generate_cwd_mapping(db_connection: sqlite3.Connection):

pyard/db.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -609,20 +609,36 @@ def load_v2_v3_mappings(db_connection):
609609

610610
def load_serology_broad_split_mappings(db_connection):
611611
sero_mapping = load_dict(
612-
db_connection, "serology_broad_split_mapping", ("serology", "splits")
612+
db_connection, "serology_broad_split_mapping", ("broad", "splits")
613613
)
614614
sero_splits = {k: v.split("/") for k, v in sero_mapping.items()}
615615
return sero_splits
616616

617617

618+
def load_serology_associated_mappings(db_connection):
619+
associated_mapping = load_dict(
620+
db_connection, "serology_associated_mappings", ("associated", "antigen")
621+
)
622+
return associated_mapping
623+
624+
618625
def save_serology_broad_split_mappings(db_connection, sero_mapping):
619626
# Save the `splits` as a "/" delimited string to db
620627
sero_splits = {sero: "/".join(splits) for sero, splits in sero_mapping.items()}
621628
save_dict(
622629
db_connection,
623630
table_name="serology_broad_split_mapping",
624631
dictionary=sero_splits,
625-
columns=("serology", "splits"),
632+
columns=("broad", "splits"),
633+
)
634+
635+
636+
def save_serology_associated_mappings(db_connection, associated_mapping):
637+
save_dict(
638+
db_connection,
639+
table_name="serology_associated_mappings",
640+
dictionary=associated_mapping,
641+
columns=("associated", "antigen"),
626642
)
627643

628644

pyard/load.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# > http://www.opensource.org/licenses/lgpl-license.php
2121
#
2222
import sys
23-
from typing import Dict, List
23+
from typing import Dict, List, Tuple
2424
from urllib.error import URLError
2525

2626
from pyard.misc import get_G_name, get_2field_allele, get_3field_allele, get_P_name
@@ -38,7 +38,7 @@ def add_locus_name(locus: str, splits: str) -> List:
3838
# Derived from rel_ser_ser.txt
3939
# https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/rel_ser_ser.txt
4040
#
41-
def load_serology_broad_split_mapping(imgt_version: str) -> Dict:
41+
def load_serology_broad_split_mapping(imgt_version: str) -> Tuple[Dict, Dict]:
4242
import pandas as pd
4343

4444
ser_ser_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/rel_ser_ser.txt"
@@ -47,21 +47,36 @@ def load_serology_broad_split_mapping(imgt_version: str) -> Dict:
4747
ser_ser_url,
4848
skiprows=6,
4949
names=["Locus", "A", "Splits", "Associated"],
50-
usecols=[0, 1, 2],
5150
dtype="string",
5251
sep=";",
53-
).dropna()
52+
)
5453
except URLError as e:
5554
print(f"Error downloading {ser_ser_url}", e, file=sys.stderr)
5655
sys.exit(1)
5756

58-
df_p["Sero"] = df_p["Locus"] + df_p["A"]
59-
df_p["Splits"] = df_p[["Locus", "Splits"]].apply(
57+
splits_df = df_p[["Locus", "A", "Splits"]].dropna()
58+
associated_df = df_p[["Locus", "A", "Associated"]].dropna()
59+
60+
splits_df["Sero"] = splits_df["Locus"] + splits_df["A"]
61+
splits_df["Splits"] = splits_df[["Locus", "Splits"]].apply(
6062
lambda x: add_locus_name(x["Locus"], x["Splits"]), axis=1
6163
)
64+
splits_df = splits_df.astype({"A": "int32"}).sort_values(by=["Locus", "A"])
65+
66+
associated_df["Sero"] = associated_df["Locus"] + associated_df["A"]
67+
associated_df["Associated"] = associated_df[["Locus", "Associated"]].apply(
68+
lambda x: add_locus_name(x["Locus"], x["Associated"]), axis=1
69+
)
70+
associated_df = associated_df.astype({"A": "int32"}).sort_values(by=["Locus", "A"])
71+
72+
splits_mapping = splits_df[["Sero", "Splits"]].set_index("Sero")["Splits"].to_dict()
73+
associated_mapping = (
74+
associated_df.explode("Associated")[["Associated", "Sero"]]
75+
.set_index("Associated")["Sero"]
76+
.to_dict()
77+
)
6278

63-
sero_mapping = df_p[["Sero", "Splits"]].set_index("Sero")["Splits"].to_dict()
64-
return sero_mapping
79+
return splits_mapping, associated_mapping
6580

6681

6782
def load_g_group(imgt_version):

scripts/pyard

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from pyard.exceptions import InvalidAlleleError, InvalidTypingError, InvalidMACE
3333
from pyard.misc import get_data_dir, get_imgt_version
3434

3535

36-
def find_similar_alleles(prefix):
36+
def find_similar_alleles(ard, prefix):
3737
alleles = ard.similar_alleles(prefix)
3838
if alleles:
3939
for allele in alleles:
@@ -62,8 +62,8 @@ def expand_mac_code():
6262
sys.exit(0)
6363

6464

65-
def find_broad_splits():
66-
mapping = pyard.find_broad_splits(args.splits)
65+
def find_broad_splits(ard):
66+
mapping = ard.find_broad_splits(args.splits)
6767
if mapping:
6868
print(f"{mapping[0]} = {'/'.join(mapping[1])}")
6969
sys.exit(0)
@@ -166,7 +166,7 @@ if __name__ == "__main__":
166166

167167
# Handle --splits option
168168
if args.splits:
169-
find_broad_splits()
169+
find_broad_splits(ard)
170170

171171
# Handle --expand-mac option
172172
if args.expand_mac:
@@ -178,7 +178,7 @@ if __name__ == "__main__":
178178

179179
# Handle --similar option
180180
if args.similar_allele:
181-
find_similar_alleles(args.similar_allele)
181+
find_similar_alleles(ard, args.similar_allele)
182182

183183
try:
184184
if args.cwd:

0 commit comments

Comments
 (0)