Skip to content

Commit 8cc55e7

Browse files
committed
Create a new column in serology mapping table that has the lgx version of the allele list lgx_allele_list
1 parent d43a035 commit 8cc55e7

File tree

3 files changed

+54
-21
lines changed

3 files changed

+54
-21
lines changed

pyard/ard.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ def __init__(
116116
broad_splits.broad_splits_ser_mapping = (
117117
dr.generate_serology_broad_split_mapping(self.db_connection, imgt_version)
118118
)
119-
dr.generate_serology_mapping(self.db_connection, imgt_version)
119+
dr.generate_serology_mapping(
120+
self.db_connection, imgt_version, self._redux_allele
121+
)
120122
# Load V2 to V3 mappings
121123
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
122124
# Save IMGT database version
@@ -265,6 +267,11 @@ def _redux_allele(
265267
return self._redux_allele(allele, "lgx")
266268
elif redux_type == "S":
267269
# find serology equivalent in serology_mapping
270+
if is_2_field_allele(allele):
271+
serology_mapping = db.find_serology_for_allele(
272+
self.db_connection,
273+
allele,
274+
)
268275
serology_mapping = db.find_serology_for_allele(self.db_connection, allele)
269276
serology_set = set()
270277
for serology, allele_list in serology_mapping.items():

pyard/data_repository.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,9 @@ def to_serological_name(locus_name: str):
354354
return sero_name
355355

356356

357-
def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
357+
def generate_serology_mapping(
358+
db_connection: sqlite3.Connection, imgt_version, redux_function
359+
):
358360
if not db.table_exists(db_connection, "serology_mapping"):
359361
df_sero = load_serology_mappings(imgt_version)
360362

@@ -387,10 +389,12 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
387389
sero_mapping_combined["Sero"] = sero_mapping_combined["Sero"].apply(
388390
to_serological_name
389391
)
390-
392+
sero_mapping_combined["lgx"] = sero_mapping_combined["Allele"].apply(
393+
lambda allele: redux_function(allele, "lgx")
394+
)
391395
sero_mapping = (
392396
sero_mapping_combined.groupby("Sero")
393-
.apply(lambda x: "/".join(sorted(x["Allele"])))
397+
.apply(lambda x: (set(x["Allele"]), set(x["lgx"])))
394398
.to_dict()
395399
)
396400

@@ -399,20 +403,29 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
399403
for broad, splits in broad_splits.broad_splits_ser_mapping.items():
400404
for split in splits:
401405
try:
402-
sero_mapping[broad] = "/".join(
403-
[sero_mapping[broad], sero_mapping[split]]
406+
sero_mapping[broad] = (
407+
sero_mapping[broad][0].union(sero_mapping[split][0]),
408+
sero_mapping[broad][1].union(sero_mapping[split][1]),
404409
)
405410
except KeyError:
406411
if split in sero_mapping:
407412
sero_mapping[broad] = sero_mapping[split]
408413

409414
# re-sort allele lists into smartsort order
410415
for sero in sero_mapping.keys():
411-
sero_mapping[sero] = "/".join(
412-
sorted(
413-
sero_mapping[sero].split("/"),
414-
key=functools.cmp_to_key(smart_sort_comparator),
415-
)
416+
sero_mapping[sero] = (
417+
"/".join(
418+
sorted(
419+
sero_mapping[sero][0],
420+
key=functools.cmp_to_key(smart_sort_comparator),
421+
)
422+
),
423+
"/".join(
424+
sorted(
425+
sero_mapping[sero][1],
426+
key=functools.cmp_to_key(smart_sort_comparator),
427+
),
428+
),
416429
)
417430

418431
db.save_serology_mappings(db_connection, sero_mapping)

pyard/db.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -393,18 +393,17 @@ def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
393393

394394

395395
def find_serology_for_allele(
396-
connection: sqlite3.Connection, allele_name: str
396+
connection: sqlite3.Connection, allele_name: str, column: str = "allele_list"
397397
) -> Dict[str, str]:
398398
"""
399399
Find similar alleles starting with the provided allele_name.
400400
401401
:param connection: db connection of type sqlite.Connection
402402
:param allele_name: Allele name to use as a prefix to find similar alleles
403+
:param column: Column to look for allele
403404
:return: list of similar alleles
404405
"""
405-
query = (
406-
"SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?"
407-
)
406+
query = f"SELECT serology, {column} FROM serology_mapping WHERE {column} LIKE ?"
408407
cursor = connection.execute(query, (f"%{allele_name}%",))
409408
results = cursor.fetchall()
410409
# fetchall() returns a list of tuples of results
@@ -574,12 +573,26 @@ def save_mac_codes(db_connection, mac, mac_table_name):
574573

575574
def save_serology_mappings(db_connection, sero_mapping):
576575
# Save the serology mapping to db
577-
save_dict(
578-
db_connection,
579-
table_name="serology_mapping",
580-
dictionary=sero_mapping,
581-
columns=("serology", "allele_list"),
582-
)
576+
cursor = db_connection.cursor()
577+
# Drop the table first
578+
cursor.execute("DROP TABLE IF EXISTS serology_mapping")
579+
# Create table
580+
create_table_sql = f"""CREATE TABLE serology_mapping (
581+
serology TEXT PRIMARY KEY,
582+
allele_list TEXT NOT NULL,
583+
lgx_allele_list TEXT NOT NULL
584+
)"""
585+
cursor.execute(create_table_sql)
586+
587+
rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())
588+
589+
# insert
590+
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)
591+
592+
# commit transaction - writes to the db
593+
db_connection.commit()
594+
# close the cursor
595+
cursor.close()
583596

584597

585598
def load_v2_v3_mappings(db_connection):

0 commit comments

Comments
 (0)