Skip to content

Commit 00ab2a0

Browse files
committed
Create a new column in serology mapping table that has the lgx version of the allele list lgx_allele_list
1 parent 4665c1d commit 00ab2a0

File tree

3 files changed

+52
-20
lines changed

3 files changed

+52
-20
lines changed

pyard/ard.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ def __init__(
123123
dr.generate_serology_mapping(
124124
self.db_connection, self.serology_mapping, imgt_version
125125
)
126+
dr.generate_serology_mapping(
127+
self.db_connection, imgt_version, self.serology_mapping, self._redux_allele
128+
)
126129
# Load V2 to V3 mappings
127130
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
128131
# Save IMGT database version
@@ -271,6 +274,11 @@ def _redux_allele(
271274
return self._redux_allele(allele, "lgx")
272275
elif redux_type == "S":
273276
# find serology equivalent in serology_mapping
277+
if is_2_field_allele(allele):
278+
serology_mapping = db.find_serology_for_allele(
279+
self.db_connection,
280+
allele,
281+
)
274282
serology_mapping = db.find_serology_for_allele(self.db_connection, allele)
275283
serology_set = set()
276284
for serology, allele_list in serology_mapping.items():

pyard/data_repository.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def to_serological_name(locus_name: str):
356356

357357

358358
def generate_serology_mapping(
359-
db_connection: sqlite3.Connection, serology_mapping, imgt_version
359+
db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function
360360
):
361361
if not db.table_exists(db_connection, "serology_mapping"):
362362
df_sero = load_serology_mappings(imgt_version)
@@ -390,10 +390,12 @@ def generate_serology_mapping(
390390
sero_mapping_combined["Sero"] = sero_mapping_combined["Sero"].apply(
391391
to_serological_name
392392
)
393-
393+
sero_mapping_combined["lgx"] = sero_mapping_combined["Allele"].apply(
394+
lambda allele: redux_function(allele, "lgx")
395+
)
394396
sero_mapping = (
395397
sero_mapping_combined.groupby("Sero")
396-
.apply(lambda x: "/".join(sorted(x["Allele"])))
398+
.apply(lambda x: (set(x["Allele"]), set(x["lgx"])))
397399
.to_dict()
398400
)
399401

@@ -402,20 +404,29 @@ def generate_serology_mapping(
402404
for broad, splits in serology_mapping.broad_splits_map.items():
403405
for split in splits:
404406
try:
405-
sero_mapping[broad] = "/".join(
406-
[sero_mapping[broad], sero_mapping[split]]
407+
sero_mapping[broad] = (
408+
sero_mapping[broad][0].union(sero_mapping[split][0]),
409+
sero_mapping[broad][1].union(sero_mapping[split][1]),
407410
)
408411
except KeyError:
409412
if split in sero_mapping:
410413
sero_mapping[broad] = sero_mapping[split]
411414

412415
# re-sort allele lists into smartsort order
413416
for sero in sero_mapping.keys():
414-
sero_mapping[sero] = "/".join(
415-
sorted(
416-
sero_mapping[sero].split("/"),
417-
key=functools.cmp_to_key(smart_sort_comparator),
418-
)
417+
sero_mapping[sero] = (
418+
"/".join(
419+
sorted(
420+
sero_mapping[sero][0],
421+
key=functools.cmp_to_key(smart_sort_comparator),
422+
)
423+
),
424+
"/".join(
425+
sorted(
426+
sero_mapping[sero][1],
427+
key=functools.cmp_to_key(smart_sort_comparator),
428+
),
429+
),
419430
)
420431

421432
db.save_serology_mappings(db_connection, sero_mapping)

pyard/db.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -393,18 +393,17 @@ def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
393393

394394

395395
def find_serology_for_allele(
396-
connection: sqlite3.Connection, allele_name: str
396+
connection: sqlite3.Connection, allele_name: str, column: str = "allele_list"
397397
) -> Dict[str, str]:
398398
"""
399399
Find similar alleles starting with the provided allele_name.
400400
401401
:param connection: db connection of type sqlite.Connection
402402
:param allele_name: Allele name to use as a prefix to find similar alleles
403+
:param column: Column to look for allele
403404
:return: list of similar alleles
404405
"""
405-
query = (
406-
"SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?"
407-
)
406+
query = f"SELECT serology, {column} FROM serology_mapping WHERE {column} LIKE ?"
408407
cursor = connection.execute(query, (f"%{allele_name}%",))
409408
results = cursor.fetchall()
410409
# fetchall() returns a list of tuples of results
@@ -574,12 +573,26 @@ def save_mac_codes(db_connection, mac, mac_table_name):
574573

575574
def save_serology_mappings(db_connection, sero_mapping):
576575
# Save the serology mapping to db
577-
save_dict(
578-
db_connection,
579-
table_name="serology_mapping",
580-
dictionary=sero_mapping,
581-
columns=("serology", "allele_list"),
582-
)
576+
cursor = db_connection.cursor()
577+
# Drop the table first
578+
cursor.execute("DROP TABLE IF EXISTS serology_mapping")
579+
# Create table
580+
create_table_sql = f"""CREATE TABLE serology_mapping (
581+
serology TEXT PRIMARY KEY,
582+
allele_list TEXT NOT NULL,
583+
lgx_allele_list TEXT NOT NULL
584+
)"""
585+
cursor.execute(create_table_sql)
586+
587+
rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())
588+
589+
# insert
590+
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)
591+
592+
# commit transaction - writes to the db
593+
db_connection.commit()
594+
# close the cursor
595+
cursor.close()
583596

584597

585598
def load_v2_v3_mappings(db_connection):

0 commit comments

Comments
 (0)