Skip to content

Commit 7a527cc

Browse files
authored
Merge pull request #203 from pbashyal-nmdp/broad_split_api
Broads and Splits
2 parents ab2b7b2 + a65f746 commit 7a527cc

File tree

12 files changed

+252
-28
lines changed

12 files changed

+252
-28
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ repos:
77
- id: check-yaml
88
- id: check-added-large-files
99
- repo: https://github.com/psf/black
10-
rev: 22.3.0
10+
rev: 23.1.0
1111
hooks:
1212
- id: black
1313
language_version: python3

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,16 @@ DRB1*08:01:01G/DRB1*08:02:01G/DRB1*08:03:02G/DRB1*08:04:01G/DRB1*08:05/ ...
192192
$ pyard -i 3290 --gl 'A1' -r lgx # For a particular version of DB
193193
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...
194194
```
195+
196+
### Find Broad/Splits of an allele or serology typing
197+
```shell
198+
$ pyard --splits "A*10"
199+
A*10 = A*25/A*26/A*34/A*66
200+
201+
$ pyard --splits B14
202+
B14 = B64/B65
203+
```
204+
195205
### Batch Reduce a CSV file
196206

197207
`pyard-csv-reduce` can be used to batch process a CSV file with HLA typings. See [documentation](extras/README.md) for instructions on how to configure and run.

api-spec.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,52 @@ paths:
261261
description: Describes what went wrong
262262
type: string
263263
example: "All of DRB1_SLUG, DRB3, DRB4, DRB5 values not provided"
264+
/splits/{allele}:
265+
get:
266+
tags:
267+
- Broad Splits
268+
operationId: api.splits_controller
269+
summary: Search Broad Split Mapping
270+
description: |
271+
Given a broad or a split, broad/split mappings will
272+
be returned if available.
273+
parameters:
274+
- name: allele
275+
in: path
276+
description: A valid Allele/Serology
277+
required: true
278+
schema:
279+
type: string
280+
example: A*10
281+
responses:
282+
200:
283+
description: Broad/Split mapping
284+
content:
285+
application/json:
286+
schema:
287+
type: object
288+
properties:
289+
broad:
290+
description: Broad Allele
291+
type: string
292+
example: "A*10"
293+
splits:
294+
description: Corresponding Splits
295+
type: array
296+
example:
297+
- "A*25"
298+
- "A*26"
299+
- "A*34"
300+
- "A*66"
301+
404:
302+
description: |
303+
Broad/Split mapping not found for the given allele
304+
content:
305+
application/json:
306+
schema:
307+
type: object
308+
properties:
309+
message:
310+
description: Mapping not found
311+
type: string
312+
example: "Broad/Split not found"

api.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,11 @@ def drbx_blender_controller():
8484
def version_controller():
8585
version = ard.get_db_version()
8686
return {"version": version}, 200
87+
88+
89+
def splits_controller(allele: str):
90+
mapping = pyard.find_broad_splits(allele)
91+
if mapping:
92+
return {"broad": mapping[0], "splits": mapping[1]}, 200
93+
94+
return {"message": f"No Broad/Splits matched {allele}"}, 404

pyard/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#
2424
from .pyard import ARD
2525
from .blender import blender as dr_blender
26+
from .broad_splits import find_splits as find_broad_splits
2627

2728
__author__ = """NMDP Bioinformatics"""
2829
__version__ = "0.9.1"

pyard/broad_splits.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# > http://www.fsf.org/licensing/licenses/lgpl.html
2121
# > http://www.opensource.org/licenses/lgpl-license.php
2222
#
23+
import re
2324

2425
#
2526
# Broad, Splits and Associated Antigens
@@ -44,24 +45,36 @@
4445
"DRB1*02": ["DRB1*15", "DRB1*16"],
4546
"DRB1*06": ["DRB1*13", "DRB1*14"],
4647
}
47-
broad_splits_ser_mapping = {
48-
"A9": ["A23", "A24"],
49-
"A10": ["A25", "A26", "A34", "A66"],
50-
"A19": ["A29", "A30", "A31", "A32", "A33", "A74"],
51-
"A28": ["A68", "A69"],
52-
"B5": ["B51", "B52"],
53-
"B12": ["B44", "B45"],
54-
"B15": ["B62", "B63", "B75", "B76", "B77"],
55-
"B16": ["B38", "B39"],
56-
"B17": ["B57", "B58"],
57-
"B21": ["B49", "B50"],
58-
"B22": ["B54", "B55", "B56"],
59-
"B40": ["B60", "B61"],
60-
"B70": ["B71", "B72"],
61-
"Cw3": ["Cw9", "Cw10"],
62-
"DQ1": ["DQ5", "DQ6"],
63-
"DR2": ["DR15", "DR16"],
64-
"DR3": ["DR17", "DR18"],
65-
"DR5": ["DR11", "DR12"],
66-
"DR6": ["DR13", "DR14"],
67-
}
48+
49+
# Loaded at runtime
50+
broad_splits_ser_mapping = None
51+
52+
HLA_regex = re.compile("^HLA-")
53+
54+
55+
def find_splits(allele: str) -> tuple:
56+
if HLA_regex.search(allele):
57+
prefix = True
58+
allele_name = allele.split("-")[1]
59+
else:
60+
prefix = False
61+
allele_name = allele
62+
63+
if "*" in allele_name:
64+
mapping = broad_splits_dna_mapping
65+
else:
66+
mapping = broad_splits_ser_mapping
67+
68+
if allele_name in mapping:
69+
return _get_mapping(allele_name, mapping, prefix)
70+
71+
for broad in mapping:
72+
if allele_name in mapping[broad]:
73+
return _get_mapping(broad, mapping, prefix)
74+
75+
76+
def _get_mapping(broad, mapping, prefix):
77+
if prefix:
78+
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
79+
else:
80+
return broad, mapping[broad]

pyard/data_repository.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929

3030
import pandas as pd
3131

32-
from . import db
32+
from . import db, broad_splits
3333
from .broad_splits import broad_splits_dna_mapping
34-
from .broad_splits import broad_splits_ser_mapping
34+
from .load import load_serology_broad_split_mapping
3535
from .misc import (
3636
get_2field_allele,
3737
get_3field_allele,
@@ -390,7 +390,7 @@ def generate_alleles_and_xx_codes_and_who(
390390
xx_codes = xx_df.groupby(["1d"]).apply(lambda x: list(x["Allele"])).to_dict()
391391

392392
# Update xx codes with broads and splits
393-
for broad, splits in broad_splits_dna_mapping.items():
393+
for broad, splits in broad_splits.broad_splits_dna_mapping.items():
394394
for split in splits:
395395
if broad in xx_codes:
396396
xx_codes[broad].extend(xx_codes[split])
@@ -635,15 +635,15 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
635635

636636
# map alleles for split serology to their corresponding broad
637637
# Update xx codes with broads and splits
638-
for broad, splits in broad_splits_ser_mapping.items():
638+
for broad, splits in broad_splits.broad_splits_ser_mapping.items():
639639
for split in splits:
640640
try:
641641
sero_mapping[broad] = "/".join(
642642
[sero_mapping[broad], sero_mapping[split]]
643643
)
644-
645644
except KeyError:
646-
sero_mapping[broad] = sero_mapping[split]
645+
if split in sero_mapping:
646+
sero_mapping[broad] = sero_mapping[split]
647647

648648
# re-sort allele lists into smartsort order
649649
for sero in sero_mapping.keys():
@@ -724,3 +724,23 @@ def set_db_version(db_connection: sqlite3.Connection, imgt_version):
724724

725725
def get_db_version(db_connection: sqlite3.Connection):
726726
return db.get_user_version(db_connection)
727+
728+
729+
def generate_serology_broad_split_mapping(db_connection, imgt_version):
730+
if not db.table_exists(db_connection, "serology_broad_split_mapping"):
731+
sero_mapping = load_serology_broad_split_mapping(imgt_version)
732+
# Save the `splits` as a "/" delimited string to db
733+
sero_splits = {sero: "/".join(splits) for sero, splits in sero_mapping.items()}
734+
db.save_dict(
735+
db_connection,
736+
table_name="serology_broad_split_mapping",
737+
dictionary=sero_splits,
738+
columns=("serology", "splits"),
739+
)
740+
return sero_mapping
741+
742+
sero_mapping = db.load_dict(
743+
db_connection, "serology_broad_split_mapping", ("serology", "splits")
744+
)
745+
sero_splits = {k: v.split("/") for k, v in sero_mapping.items()}
746+
return sero_splits

pyard/load.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import Dict, List
2+
3+
import pandas as pd
4+
5+
IMGT_HLA_URL = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/"
6+
7+
8+
def add_locus_name(locus: str, splits: str) -> List:
9+
split_list = map(lambda sero: locus + sero, splits.split("/"))
10+
return list(split_list)
11+
12+
13+
#
14+
# Derived from rel_ser_ser.txt
15+
# https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/rel_ser_ser.txt
16+
#
17+
def load_serology_broad_split_mapping(imgt_version: str) -> Dict:
18+
ser_ser_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/rel_ser_ser.txt"
19+
df_p = pd.read_csv(
20+
ser_ser_url,
21+
skiprows=6,
22+
names=["Locus", "A", "Splits", "Associated"],
23+
usecols=[0, 1, 2],
24+
dtype="string",
25+
sep=";",
26+
).dropna()
27+
28+
df_p["Sero"] = df_p["Locus"] + df_p["A"]
29+
df_p["Splits"] = df_p[["Locus", "Splits"]].apply(
30+
lambda x: add_locus_name(x["Locus"], x["Splits"]), axis=1
31+
)
32+
33+
sero_mapping = df_p[["Sero", "Splits"]].set_index("Sero")["Splits"].to_dict()
34+
return sero_mapping

pyard/pyard.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
from . import db
3030
from . import data_repository as dr
31+
from . import broad_splits
3132
from .smart_sort import smart_sort_comparator
3233
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
3334
from .misc import get_n_field_allele, get_2field_allele, expression_chars
@@ -119,6 +120,9 @@ def __init__(
119120
self.shortnulls = dr.generate_short_nulls(self.db_connection, self.who_group)
120121

121122
# Load Serology mappings
123+
broad_splits.broad_splits_ser_mapping = (
124+
dr.generate_serology_broad_split_mapping(self.db_connection, imgt_version)
125+
)
122126
dr.generate_serology_mapping(self.db_connection, imgt_version)
123127
# Load V2 to V3 mappings
124128
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)

scripts/pyard

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ if __name__ == "__main__":
6464
dest="redux_type",
6565
help="Reduction Method",
6666
)
67+
parser.add_argument("--splits", dest="splits", help="Find Broad and Splits")
6768

6869
args = parser.parse_args()
6970

@@ -78,5 +79,11 @@ if __name__ == "__main__":
7879
print(f"IPD-IMGT/HLA version:", version)
7980
sys.exit(0)
8081

82+
if args.splits:
83+
mapping = pyard.find_broad_splits(args.splits)
84+
if mapping:
85+
print(f"{mapping[0]} = {'/'.join(mapping[1])}")
86+
sys.exit(0)
87+
8188
print(ard.redux_gl(args.gl_string, args.redux_type))
8289
del ard

0 commit comments

Comments
 (0)