diff --git a/Dockerfile b/Dockerfile index 1ddede0..1eb81f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=1.1.0 +ARG PY_ARD_VERSION=1.1.1 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index 62becb7..efe216e 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "1.1.0" + version: "1.1.1" servers: - url: 'http://localhost:8080' tags: diff --git a/pyard/__init__.py b/pyard/__init__.py index 6e0591d..8a3cec8 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -26,7 +26,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "1.1.0" +__version__ = "1.1.1" def init( diff --git a/pyard/ard.py b/pyard/ard.py index b7953ec..2ce8331 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -121,8 +121,9 @@ def __init__( ) dr.generate_serology_mapping( - self.db_connection, self.serology_mapping, imgt_version + self.db_connection, imgt_version, self.serology_mapping, self._redux_allele ) + # Load V2 to V3 mappings dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version) # Save IMGT database version @@ -271,7 +272,15 @@ def _redux_allele( return self._redux_allele(allele, "lgx") elif redux_type == "S": # find serology equivalent in serology_mapping - serology_mapping = db.find_serology_for_allele(self.db_connection, allele) + if is_2_field_allele(allele): + allele = self._redux_allele(allele, "lgx") + serology_mapping = db.find_serology_for_allele( + self.db_connection, allele, "lgx_allele_list" + ) + else: + serology_mapping = db.find_serology_for_allele( + self.db_connection, allele + ) serology_set = set() for serology, allele_list in serology_mapping.items(): if allele in allele_list.split("/"): diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 8df94d7..e915685 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -356,7 +356,7 @@ def to_serological_name(locus_name: str): def generate_serology_mapping( - db_connection: sqlite3.Connection, serology_mapping, imgt_version + db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function ): if not db.table_exists(db_connection, "serology_mapping"): df_sero = load_serology_mappings(imgt_version) @@ -390,10 +390,12 @@ def generate_serology_mapping( sero_mapping_combined["Sero"] = sero_mapping_combined["Sero"].apply( to_serological_name ) - + sero_mapping_combined["lgx"] = sero_mapping_combined["Allele"].apply( + lambda allele: redux_function(allele, "lgx") + ) sero_mapping = ( sero_mapping_combined.groupby("Sero") - .apply(lambda x: "/".join(sorted(x["Allele"]))) + .apply(lambda x: (set(x["Allele"]), set(x["lgx"]))) .to_dict() ) @@ -402,8 +404,9 @@ def generate_serology_mapping( for broad, splits in serology_mapping.broad_splits_map.items(): for split in splits: try: - sero_mapping[broad] = "/".join( - [sero_mapping[broad], sero_mapping[split]] + sero_mapping[broad] = ( + sero_mapping[broad][0].union(sero_mapping[split][0]), + sero_mapping[broad][1].union(sero_mapping[split][1]), ) except KeyError: if split in sero_mapping: @@ -411,11 +414,19 @@ def generate_serology_mapping( # re-sort allele lists into smartsort order for sero in sero_mapping.keys(): - sero_mapping[sero] = "/".join( - sorted( - sero_mapping[sero].split("/"), - key=functools.cmp_to_key(smart_sort_comparator), - ) + sero_mapping[sero] = ( + "/".join( + sorted( + sero_mapping[sero][0], + key=functools.cmp_to_key(smart_sort_comparator), + ) + ), + "/".join( + sorted( + sero_mapping[sero][1], + key=functools.cmp_to_key(smart_sort_comparator), + ), + ), ) db.save_serology_mappings(db_connection, sero_mapping) diff --git a/pyard/db.py b/pyard/db.py index cd7f33f..10509b1 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -393,18 +393,17 @@ def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]: def find_serology_for_allele( - connection: sqlite3.Connection, allele_name: str + connection: sqlite3.Connection, allele_name: str, column: str = "allele_list" ) -> Dict[str, str]: """ Find similar alleles starting with the provided allele_name. :param connection: db connection of type sqlite.Connection :param allele_name: Allele name to use as a prefix to find similar alleles + :param column: Column to look for allele, "allele_list" or "lgx_allele_list" :return: list of similar alleles """ - query = ( - "SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?" - ) + query = f"SELECT serology, {column} FROM serology_mapping WHERE {column} LIKE ?" cursor = connection.execute(query, (f"%{allele_name}%",)) results = cursor.fetchall() # fetchall() returns a list of tuples of results @@ -574,12 +573,26 @@ def save_mac_codes(db_connection, mac, mac_table_name): def save_serology_mappings(db_connection, sero_mapping): # Save the serology mapping to db - save_dict( - db_connection, - table_name="serology_mapping", - dictionary=sero_mapping, - columns=("serology", "allele_list"), - ) + cursor = db_connection.cursor() + # Drop the table first + cursor.execute("DROP TABLE IF EXISTS serology_mapping") + # Create table + create_table_sql = f"""CREATE TABLE serology_mapping ( + serology TEXT PRIMARY KEY, + allele_list TEXT NOT NULL, + lgx_allele_list TEXT NOT NULL + )""" + cursor.execute(create_table_sql) + + rows = ((k, v[0], v[1]) for k, v in sero_mapping.items()) + + # insert + cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows) + + # commit transaction - writes to the db + db_connection.commit() + # close the cursor + cursor.close() def load_v2_v3_mappings(db_connection): diff --git a/setup.cfg b/setup.cfg index d1c5d9b..75a585c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.1.0 +current_version = 1.1.1 commit = True tag = True diff --git a/setup.py b/setup.py index 83ec1d8..0716d0f 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="1.1.0", + version="1.1.1", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown", diff --git a/tests/features/serology_redux.feature b/tests/features/serology_redux.feature index 66af7e2..6dbb665 100644 --- a/tests/features/serology_redux.feature +++ b/tests/features/serology_redux.feature @@ -29,3 +29,11 @@ Feature: Serology Reduction Examples: Skip Loci that don't have Serology mappings | Allele | Level | Redux Serology | | A*01:01+A*01:01^B*08:ASXJP+B*07:02^C*02:02+C*07:HTGM^DPB1*28:01:01G+DPB1*296:01 | S | A1+A1^B7+B8^Cw2+Cw7 | + + Examples: 2 field Serology Reduction uses lgx version of serology mapping + + | Allele | Level | Redux Serology | + | DRB1*07:34 | S | DR7 | + | DRB1*07:34:01 | S | DR7 | + | DRB1*07:34:02 | S | DR7 | + | DRB4*01:03N | S | X | diff --git a/tests/steps/redux_allele.py b/tests/steps/redux_allele.py index 9236e44..10d6a17 100644 --- a/tests/steps/redux_allele.py +++ b/tests/steps/redux_allele.py @@ -39,7 +39,11 @@ def step_impl(context, level): @when("reducing on the {level} level with ping") def step_impl(context, level): context.level = level - context.redux_allele = context.ard_ping.redux(context.allele, level) + redux_allele = context.ard_ping.redux(context.allele, level) + if not redux_allele: + context.redux_allele = "X" + else: + context.redux_allele = redux_allele @when("reducing on the {level} level with ARS suffix enabled")