Skip to content

Commit 9952d32

Browse files
authored
Merge pull request #281 from pbashyal-nmdp/feature/similar
FInd similar alleles/MACs
2 parents a2d3d8c + 6543b8d commit 9952d32

File tree

2 files changed

+119
-30
lines changed

2 files changed

+119
-30
lines changed

pyard/db.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,16 +356,16 @@ def load_dict(
356356
return table_as_dict
357357

358358

359-
def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str]:
359+
def similar_alleles(connection: sqlite3.Connection, allele_prefix: str) -> Set[str]:
360360
"""
361-
Find similar alleles starting with the provided allele_name.
361+
Find similar alleles starting with the provided prefix.
362362
363363
:param connection: db connection of type sqlite.Connection
364-
:param allele_name: Allele name to use as a prefix to find similar alleles
364+
:param allele_prefix: Allele name to use as a prefix to find similar alleles
365365
:return: list of similar alleles
366366
"""
367367
query = "SELECT allele FROM alleles WHERE allele LIKE ?"
368-
cursor = connection.execute(query, (f"{allele_name}%",))
368+
cursor = connection.execute(query, (f"{allele_prefix}%",))
369369
result = cursor.fetchall()
370370
# fetchall() returns a list of tuples of results
371371
# e.g. [('C*04:09N',)]
@@ -374,6 +374,24 @@ def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str
374374
return alleles
375375

376376

377+
def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:
378+
"""
379+
Find similar MAC codes starting with the provided prefix.
380+
381+
:param connection: db connection of type sqlite.Connection
382+
:param mac_prefix: MAC fragment to use as a prefix to find similar MACs
383+
:return: list of similar MAC codes
384+
"""
385+
query = "SELECT code FROM mac_codes WHERE code LIKE ?"
386+
cursor = connection.execute(query, (f"{mac_prefix}%",))
387+
result = cursor.fetchall()
388+
# fetchall() returns a list of tuples of results
389+
# e.g. [('DJZUP',)]
390+
# Get out the first value of the tuple from the result list
391+
codes = set(map(lambda t: t[0], result))
392+
return codes
393+
394+
377395
def find_serology_for_allele(
378396
connection: sqlite3.Connection, allele_name: str
379397
) -> Dict[str, str]:

scripts/pyard

Lines changed: 97 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,92 @@
2222
# > http://www.opensource.org/licenses/lgpl-license.php
2323
#
2424
import argparse
25+
import functools
2526
import sys
2627

28+
from pyard import smart_sort
2729
from pyard.constants import VALID_REDUCTION_TYPES
2830
import pyard.misc
31+
from pyard.db import similar_alleles, similar_mac
2932
from pyard.exceptions import InvalidAlleleError, InvalidTypingError, InvalidMACError
3033
from pyard.misc import get_data_dir, get_imgt_version
3134

35+
36+
def find_similar_alleles(ard, prefix):
37+
if "*" in prefix: # Only for those that have locus
38+
locus, fields = prefix.split("*")
39+
if fields: # Only if at least a field is specified after *
40+
if len(fields.split(":")) == 2: # Check for MACs
41+
first_field, mac_prefix = fields.split(":")
42+
if mac_prefix.isalpha():
43+
similar_mac_names = similar_mac(ard.db_connection, mac_prefix)
44+
if similar_mac_names:
45+
locus_prefix = f"{locus}*{first_field}"
46+
# TODO: validate all the mac codes with the prefix
47+
# show only the valid macs
48+
for code in sorted(similar_mac_names):
49+
print(f"{locus_prefix}:{code}")
50+
else:
51+
# Nothing after *
52+
sys.exit(2)
53+
else:
54+
# No *
55+
sys.exit(1)
56+
57+
# find similar alleles
58+
similar_allele_names = similar_alleles(ard.db_connection, prefix)
59+
if similar_allele_names:
60+
for allele in sorted(
61+
similar_allele_names,
62+
key=functools.cmp_to_key(smart_sort.smart_sort_comparator),
63+
):
64+
print(allele)
65+
sys.exit(0)
66+
67+
68+
def lookup_mac_codes():
69+
global e
70+
try:
71+
mac = ard.lookup_mac(args.lookup_mac)
72+
print(mac)
73+
except InvalidMACError as e:
74+
print(e.message, file=sys.stderr)
75+
sys.exit(0)
76+
77+
78+
def expand_mac_code():
79+
global allele_list, e
80+
try:
81+
allele_list = ard.expand_mac(args.expand_mac)
82+
print(allele_list)
83+
except InvalidMACError as e:
84+
print(e.message, file=sys.stderr)
85+
sys.exit(0)
86+
87+
88+
def find_broad_splits():
89+
mapping = pyard.find_broad_splits(args.splits)
90+
if mapping:
91+
print(f"{mapping[0]} = {'/'.join(mapping[1])}")
92+
sys.exit(0)
93+
94+
95+
def show_version():
96+
version = ard.get_db_version()
97+
print(f"IPD-IMGT/HLA version:", version)
98+
print(f"py-ard version:", pyard.__version__)
99+
sys.exit(0)
100+
101+
102+
def perform_cwd_redux():
103+
global cwd_redux
104+
if args.validate:
105+
ard.validate(args.cwd)
106+
cwd_redux = ard.cwd_redux(args.cwd)
107+
print(cwd_redux)
108+
sys.exit(0)
109+
110+
32111
if __name__ == "__main__":
33112
parser = argparse.ArgumentParser(
34113
description="""
@@ -76,6 +155,11 @@ if __name__ == "__main__":
76155
parser.add_argument(
77156
"--lookup-mac", dest="lookup_mac", help="Lookup MAC for an Allele List"
78157
)
158+
parser.add_argument(
159+
"--similar",
160+
dest="similar_allele",
161+
help="Find Similar Alleles with given prefix",
162+
)
79163
parser.add_argument(
80164
"--non-strict",
81165
dest="non_strict",
@@ -100,45 +184,32 @@ if __name__ == "__main__":
100184

101185
ard = pyard.init(imgt_version=imgt_version, data_dir=data_dir, config=new_config)
102186

187+
# Handle --version option
103188
if args.version:
104-
version = ard.get_db_version()
105-
print(f"IPD-IMGT/HLA version:", version)
106-
print(f"py-ard version:", pyard.__version__)
107-
sys.exit(0)
189+
show_version()
108190

191+
# Handle --splits option
109192
if args.splits:
110-
mapping = pyard.find_broad_splits(args.splits)
111-
if mapping:
112-
print(f"{mapping[0]} = {'/'.join(mapping[1])}")
113-
sys.exit(0)
193+
find_broad_splits()
114194

115195
# Handle --expand-mac option
116196
if args.expand_mac:
117-
try:
118-
allele_list = ard.expand_mac(args.expand_mac)
119-
print(allele_list)
120-
except InvalidMACError as e:
121-
print(e.message, file=sys.stderr)
122-
sys.exit(0)
197+
expand_mac_code()
123198

124199
# Handle --lookup-mac option
125200
if args.lookup_mac:
126-
try:
127-
mac = ard.lookup_mac(args.lookup_mac)
128-
print(mac)
129-
except InvalidMACError as e:
130-
print(e.message, file=sys.stderr)
131-
sys.exit(0)
201+
lookup_mac_codes()
202+
203+
# Handle --similar option
204+
if args.similar_allele:
205+
find_similar_alleles(ard, args.similar_allele)
132206

133207
try:
208+
if args.cwd:
209+
perform_cwd_redux()
210+
134211
if args.validate and args.gl_string:
135212
ard.validate(args.gl_string)
136-
if args.cwd:
137-
if args.validate:
138-
ard.validate(args.cwd)
139-
cwd_redux = ard.cwd_redux(args.cwd)
140-
print(cwd_redux)
141-
sys.exit(0)
142213

143214
if args.redux_type:
144215
print(ard.redux(args.gl_string, args.redux_type))

0 commit comments

Comments
 (0)