Skip to content

Commit dce480b

Browse files
authored
Merge pull request #153 from pbashyal-nmdp/feature/summary_failure_table
Show summary failure table in batch mode
2 parents a93e960 + a134df5 commit dce480b

File tree

5 files changed

+37
-8
lines changed

5 files changed

+37
-8
lines changed

pyard/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@
2424
from .pyard import ARD
2525

2626
__author__ = """NMDP Bioinformatics"""
27-
__version__ = '0.7.3'
27+
__version__ = '0.7.4'
28+

pyard/db.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
138138
cursor.close()
139139
return result[0] > 0
140140

141+
141142
def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
142143
"""
143144
Look up Serology in the database and return corresponding list of alleles.
@@ -156,6 +157,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
156157
alleles = []
157158
return alleles
158159

160+
159161
def is_valid_serology(connection: sqlite3.Connection, serology: str) -> bool:
160162
"""
161163
Check db if the serology exists
@@ -287,3 +289,21 @@ def load_dict(connection: sqlite3.Connection, table_name: str, columns: Tuple[st
287289
table_as_dict = {k: v for k, v in cursor.fetchall()}
288290
cursor.close()
289291
return table_as_dict
292+
293+
294+
def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str]:
295+
"""
296+
Find similar alleles starting with the provided allele_name.
297+
298+
:param connection: db connection of type sqlite.Connection
299+
:param allele_name: Allele name to use as a prefix to find similar alleles
300+
:return: list of similar alleles
301+
"""
302+
query = f"SELECT allele FROM alleles WHERE allele LIKE ?"
303+
cursor = connection.execute(query, (f"{allele_name}%",))
304+
result = cursor.fetchall()
305+
# fetchall() returns a list of tuples of results
306+
# e.g. [('C*04:09N',)]
307+
# Get out the first value of the tuple from the result list
308+
alleles = set(map(lambda t: t[0], result))
309+
return alleles

scripts/pyard-reduce-csv

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def reduce(allele, locus, column_name):
125125
print(e)
126126
message = f"Failed reducing '{locus_allele}' in column {column_name}"
127127
print(message)
128-
failure_summary_messages.append(message)
128+
failed_to_reduce_alleles.append((column_name, locus_allele))
129129
return allele
130130
# print(f"reduced to '{reduced_allele}'")
131131
if reduced_allele:
@@ -210,7 +210,7 @@ if __name__ == '__main__':
210210
header=0, dtype=str,
211211
keep_default_na=False)
212212

213-
failure_summary_messages = []
213+
failed_to_reduce_alleles = []
214214
# Reduce each of the specified columns
215215
for column in ard_config["columns_to_reduce_in_csv"]:
216216
if verbose:
@@ -245,12 +245,20 @@ if __name__ == '__main__':
245245
out_file_name = f"{ard_config['out_csv_filename'] + '.gz' if ard_config['apply_compression'] else ''}"
246246
df.to_csv(out_file_name, index=False, compression=ard_config["apply_compression"])
247247

248-
if len(failure_summary_messages) == 0:
248+
if len(failed_to_reduce_alleles) == 0:
249249
print("No Errors", file=sys.stderr)
250250
else:
251251
print("Summary", file=sys.stderr)
252252
print("-------", file=sys.stderr)
253-
for message in failure_summary_messages:
254-
print("\t", message, file=sys.stderr)
253+
print(f"{len(failed_to_reduce_alleles)} alleles failed to reduce.", file=sys.stderr)
254+
print("| Column Name | Allele | Did you mean ? ", file=sys.stderr)
255+
print("| --------------- | ---------------- | ------------------------- ", file=sys.stderr)
256+
for column_name, locus_allele in failed_to_reduce_alleles:
257+
similar_allele_names = pyard.db.similar_alleles(ard.db_connection, locus_allele)
258+
if similar_allele_names:
259+
similar_allele_names = ",".join(sorted(similar_allele_names, reverse=True))
260+
else:
261+
similar_allele_names = 'NA'
262+
print(f"| {column_name:15} | {locus_allele:16} | {similar_allele_names} ", file=sys.stderr)
255263
# Done
256264
print(f"Saved result to file:{out_file_name}")

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.7.3
2+
current_version = 0.7.4
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
setup(
4444
name='py-ard',
45-
version='0.7.3',
45+
version='0.7.4',
4646
description="ARD reduction for HLA with Python",
4747
long_description=readme + '\n\n' + history,
4848
long_description_content_type="text/markdown",

0 commit comments

Comments
 (0)