Skip to content

Commit 9d1792d

Browse files
authored
Merge pull request #305 from pbashyal-nmdp/290_serology_lgx_table
Serology reduction based on lgx
2 parents 4665c1d + ab99745 commit 9d1792d

File tree

10 files changed

+73
-28
lines changed

10 files changed

+73
-28
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"
44

55
WORKDIR /app
66

7-
ARG PY_ARD_VERSION=1.1.0
7+
ARG PY_ARD_VERSION=1.1.1
88

99
COPY requirements.txt /app
1010
RUN pip install --no-cache-dir --upgrade pip && \

api-spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ openapi: 3.0.3
22
info:
33
title: ARD Reduction
44
description: Reduce to ARD Level
5-
version: "1.1.0"
5+
version: "1.1.1"
66
servers:
77
- url: 'http://localhost:8080'
88
tags:

pyard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from .misc import get_imgt_db_versions as db_versions
2727

2828
__author__ = """NMDP Bioinformatics"""
29-
__version__ = "1.1.0"
29+
__version__ = "1.1.1"
3030

3131

3232
def init(

pyard/ard.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ def __init__(
121121
)
122122

123123
dr.generate_serology_mapping(
124-
self.db_connection, self.serology_mapping, imgt_version
124+
self.db_connection, imgt_version, self.serology_mapping, self._redux_allele
125125
)
126+
126127
# Load V2 to V3 mappings
127128
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
128129
# Save IMGT database version
@@ -271,7 +272,15 @@ def _redux_allele(
271272
return self._redux_allele(allele, "lgx")
272273
elif redux_type == "S":
273274
# find serology equivalent in serology_mapping
274-
serology_mapping = db.find_serology_for_allele(self.db_connection, allele)
275+
if is_2_field_allele(allele):
276+
allele = self._redux_allele(allele, "lgx")
277+
serology_mapping = db.find_serology_for_allele(
278+
self.db_connection, allele, "lgx_allele_list"
279+
)
280+
else:
281+
serology_mapping = db.find_serology_for_allele(
282+
self.db_connection, allele
283+
)
275284
serology_set = set()
276285
for serology, allele_list in serology_mapping.items():
277286
if allele in allele_list.split("/"):

pyard/data_repository.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def to_serological_name(locus_name: str):
356356

357357

358358
def generate_serology_mapping(
359-
db_connection: sqlite3.Connection, serology_mapping, imgt_version
359+
db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function
360360
):
361361
if not db.table_exists(db_connection, "serology_mapping"):
362362
df_sero = load_serology_mappings(imgt_version)
@@ -390,10 +390,12 @@ def generate_serology_mapping(
390390
sero_mapping_combined["Sero"] = sero_mapping_combined["Sero"].apply(
391391
to_serological_name
392392
)
393-
393+
sero_mapping_combined["lgx"] = sero_mapping_combined["Allele"].apply(
394+
lambda allele: redux_function(allele, "lgx")
395+
)
394396
sero_mapping = (
395397
sero_mapping_combined.groupby("Sero")
396-
.apply(lambda x: "/".join(sorted(x["Allele"])))
398+
.apply(lambda x: (set(x["Allele"]), set(x["lgx"])))
397399
.to_dict()
398400
)
399401

@@ -402,20 +404,29 @@ def generate_serology_mapping(
402404
for broad, splits in serology_mapping.broad_splits_map.items():
403405
for split in splits:
404406
try:
405-
sero_mapping[broad] = "/".join(
406-
[sero_mapping[broad], sero_mapping[split]]
407+
sero_mapping[broad] = (
408+
sero_mapping[broad][0].union(sero_mapping[split][0]),
409+
sero_mapping[broad][1].union(sero_mapping[split][1]),
407410
)
408411
except KeyError:
409412
if split in sero_mapping:
410413
sero_mapping[broad] = sero_mapping[split]
411414

412415
# re-sort allele lists into smartsort order
413416
for sero in sero_mapping.keys():
414-
sero_mapping[sero] = "/".join(
415-
sorted(
416-
sero_mapping[sero].split("/"),
417-
key=functools.cmp_to_key(smart_sort_comparator),
418-
)
417+
sero_mapping[sero] = (
418+
"/".join(
419+
sorted(
420+
sero_mapping[sero][0],
421+
key=functools.cmp_to_key(smart_sort_comparator),
422+
)
423+
),
424+
"/".join(
425+
sorted(
426+
sero_mapping[sero][1],
427+
key=functools.cmp_to_key(smart_sort_comparator),
428+
),
429+
),
419430
)
420431

421432
db.save_serology_mappings(db_connection, sero_mapping)

pyard/db.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -393,18 +393,17 @@ def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
393393

394394

395395
def find_serology_for_allele(
396-
connection: sqlite3.Connection, allele_name: str
396+
connection: sqlite3.Connection, allele_name: str, column: str = "allele_list"
397397
) -> Dict[str, str]:
398398
"""
399399
Find similar alleles starting with the provided allele_name.
400400
401401
:param connection: db connection of type sqlite.Connection
402402
:param allele_name: Allele name to use as a prefix to find similar alleles
403+
:param column: Column to look for allele, "allele_list" or "lgx_allele_list"
403404
:return: list of similar alleles
404405
"""
405-
query = (
406-
"SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?"
407-
)
406+
query = f"SELECT serology, {column} FROM serology_mapping WHERE {column} LIKE ?"
408407
cursor = connection.execute(query, (f"%{allele_name}%",))
409408
results = cursor.fetchall()
410409
# fetchall() returns a list of tuples of results
@@ -574,12 +573,26 @@ def save_mac_codes(db_connection, mac, mac_table_name):
574573

575574
def save_serology_mappings(db_connection, sero_mapping):
576575
# Save the serology mapping to db
577-
save_dict(
578-
db_connection,
579-
table_name="serology_mapping",
580-
dictionary=sero_mapping,
581-
columns=("serology", "allele_list"),
582-
)
576+
cursor = db_connection.cursor()
577+
# Drop the table first
578+
cursor.execute("DROP TABLE IF EXISTS serology_mapping")
579+
# Create table
580+
create_table_sql = f"""CREATE TABLE serology_mapping (
581+
serology TEXT PRIMARY KEY,
582+
allele_list TEXT NOT NULL,
583+
lgx_allele_list TEXT NOT NULL
584+
)"""
585+
cursor.execute(create_table_sql)
586+
587+
rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())
588+
589+
# insert
590+
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)
591+
592+
# commit transaction - writes to the db
593+
db_connection.commit()
594+
# close the cursor
595+
cursor.close()
583596

584597

585598
def load_v2_v3_mappings(db_connection):

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 1.1.0
2+
current_version = 1.1.1
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636

3737
setup(
3838
name="py-ard",
39-
version="1.1.0",
39+
version="1.1.1",
4040
description="ARD reduction for HLA with Python",
4141
long_description=readme,
4242
long_description_content_type="text/markdown",

tests/features/serology_redux.feature

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,11 @@ Feature: Serology Reduction
2929
Examples: Skip Loci that don't have Serology mappings
3030
| Allele | Level | Redux Serology |
3131
| A*01:01+A*01:01^B*08:ASXJP+B*07:02^C*02:02+C*07:HTGM^DPB1*28:01:01G+DPB1*296:01 | S | A1+A1^B7+B8^Cw2+Cw7 |
32+
33+
Examples: 2 field Serology Reduction uses lgx version of serology mapping
34+
35+
| Allele | Level | Redux Serology |
36+
| DRB1*07:34 | S | DR7 |
37+
| DRB1*07:34:01 | S | DR7 |
38+
| DRB1*07:34:02 | S | DR7 |
39+
| DRB4*01:03N | S | X |

tests/steps/redux_allele.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ def step_impl(context, level):
3939
@when("reducing on the {level} level with ping")
4040
def step_impl(context, level):
4141
context.level = level
42-
context.redux_allele = context.ard_ping.redux(context.allele, level)
42+
redux_allele = context.ard_ping.redux(context.allele, level)
43+
if not redux_allele:
44+
context.redux_allele = "X"
45+
else:
46+
context.redux_allele = redux_allele
4347

4448

4549
@when("reducing on the {level} level with ARS suffix enabled")

0 commit comments

Comments
 (0)