Skip to content

Commit 86d27b7

Browse files
authored
Merge pull request #218 from pbashyal-nmdp/update_for_performance
Performance Updates
2 parents 8816ad4 + 8161eaf commit 86d27b7

File tree

15 files changed

+192
-148
lines changed

15 files changed

+192
-148
lines changed

api-spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ openapi: 3.0.3
22
info:
33
title: ARD Reduction
44
description: Reduce to ARD Level
5-
version: "1.0.0rc2"
5+
version: "1.0.0rc3"
66
servers:
77
- url: 'http://localhost:8080'
88
tags:

api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def validate_controller():
1717
return {"message": "gl_string not provided"}, 404
1818
# Validate
1919
try:
20-
ard.isvalid_gl(gl_string)
20+
ard.validate(gl_string)
2121
return {"valid": True}, 200
2222
except InvalidAlleleError as e:
2323
return {

extras/reduce_conf.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,7 @@
5454
"map_drb345_to_drbx": false,
5555
"apply_compression": "gzip",
5656
"generate_glstring": true,
57+
"redux_cache_size": 1000,
58+
"reduced_column_prefix": "reduced_",
5759
"verbose_log": true
5860
}

pyard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from .misc import get_imgt_db_versions as db_versions
2828

2929
__author__ = """NMDP Bioinformatics"""
30-
__version__ = "1.0.0rc2"
30+
__version__ = "1.0.0rc3"
3131

3232

3333
def init(

pyard/ard.py

Lines changed: 54 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import sys
2727
from typing import Iterable, List
2828

29-
from . import broad_splits
29+
from . import broad_splits, smart_sort
3030
from . import data_repository as dr
3131
from . import db
3232
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
@@ -41,7 +41,6 @@
4141
expression_chars,
4242
DEFAULT_CACHE_SIZE,
4343
)
44-
from .smart_sort import smart_sort_comparator
4544

4645
default_config = {
4746
"reduce_serology": True,
@@ -56,6 +55,7 @@
5655
"verbose_log": True,
5756
}
5857

58+
5959
# Typing information
6060

6161

@@ -95,17 +95,16 @@ def __init__(
9595
self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version)
9696
# Load Alleles and XX Codes
9797
(
98-
self.valid_alleles,
99-
self.who_alleles,
100-
self.xx_codes,
101-
self.who_group,
102-
self.exp_alleles,
98+
self.code_mappings,
99+
self.allele_group,
103100
) = dr.generate_alleles_and_xx_codes_and_who(
104101
self.db_connection, imgt_version, self.ars_mappings
105102
)
106103

107104
# Generate short nulls from WHO mapping
108-
self.shortnulls = dr.generate_short_nulls(self.db_connection, self.who_group)
105+
self.shortnulls = dr.generate_short_nulls(
106+
self.db_connection, self.code_mappings.who_group
107+
)
109108

110109
# Load Serology mappings
111110
broad_splits.broad_splits_ser_mapping = (
@@ -128,6 +127,12 @@ def __init__(
128127
self._redux_allele
129128
)
130129
self.redux = functools.lru_cache(maxsize=max_cache_size)(self.redux)
130+
self.is_mac = functools.lru_cache(maxsize=max_cache_size)(self.is_mac)
131+
self.smart_sort_comparator = functools.lru_cache(maxsize=max_cache_size)(
132+
smart_sort.smart_sort_comparator
133+
)
134+
else:
135+
self.smart_sort_comparator = smart_sort.smart_sort_comparator
131136

132137
# reference data is read-only and can be frozen
133138
# Works only for Python >= 3.9
@@ -213,8 +218,10 @@ def _redux_allele(
213218
# new redux_type which is full WHO expansion
214219
if self._is_who_allele(allele):
215220
return allele
216-
if allele in self.who_group:
217-
return self.redux("/".join(self.who_group[allele]), redux_type)
221+
if allele in self.code_mappings.who_group:
222+
return self.redux(
223+
"/".join(self.code_mappings.who_group[allele]), redux_type
224+
)
218225
else:
219226
return allele
220227
elif redux_type == "exon":
@@ -254,8 +261,7 @@ def _redux_allele(
254261
else:
255262
raise InvalidAlleleError(f"{allele} is an invalid allele.")
256263

257-
@staticmethod
258-
def sorted_unique_gl(gls: List[str], delim: str) -> str:
264+
def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
259265
"""
260266
Make a list of sorted unique GL Strings separated by delim.
261267
As the list may itself contains elements that are separated by the
@@ -272,7 +278,7 @@ def sorted_unique_gl(gls: List[str], delim: str) -> str:
272278
if delim == "+":
273279
# No need to make unique. eg. homozygous cases are valid for SLUGs
274280
return delim.join(
275-
sorted(gls, key=functools.cmp_to_key(smart_sort_comparator))
281+
sorted(gls, key=functools.cmp_to_key(self.smart_sort_comparator))
276282
)
277283

278284
# generate a unique list over a delimiter
@@ -282,7 +288,7 @@ def sorted_unique_gl(gls: List[str], delim: str) -> str:
282288
all_gls += gl.split(delim)
283289
unique_gls = set(all_gls)
284290
return delim.join(
285-
sorted(unique_gls, key=functools.cmp_to_key(smart_sort_comparator))
291+
sorted(unique_gls, key=functools.cmp_to_key(self.smart_sort_comparator))
286292
)
287293

288294
@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
@@ -302,28 +308,28 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
302308

303309
self.validate(glstring)
304310

305-
if re.search(r"\^", glstring):
306-
return self.sorted_unique_gl(
311+
if "^" in glstring:
312+
return self._sorted_unique_gl(
307313
[self.redux(a, redux_type) for a in glstring.split("^")], "^"
308314
)
309315

310-
if re.search(r"\|", glstring):
311-
return self.sorted_unique_gl(
316+
if "|" in glstring:
317+
return self._sorted_unique_gl(
312318
[self.redux(a, redux_type) for a in glstring.split("|")], "|"
313319
)
314320

315-
if re.search(r"\+", glstring):
316-
return self.sorted_unique_gl(
321+
if "+" in glstring:
322+
return self._sorted_unique_gl(
317323
[self.redux(a, redux_type) for a in glstring.split("+")], "+"
318324
)
319325

320-
if re.search("~", glstring):
321-
return self.sorted_unique_gl(
326+
if "~" in glstring:
327+
return self._sorted_unique_gl(
322328
[self.redux(a, redux_type) for a in glstring.split("~")], "~"
323329
)
324330

325-
if re.search("/", glstring):
326-
return self.sorted_unique_gl(
331+
if "/" in glstring:
332+
return self._sorted_unique_gl(
327333
[self.redux(a, redux_type) for a in glstring.split("/")], "/"
328334
)
329335

@@ -353,11 +359,13 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
353359
if self.is_XX(glstring, loc_antigen, code):
354360
if is_hla_prefix:
355361
reduced_alleles = self.redux(
356-
"/".join(self.xx_codes[loc_antigen]), redux_type
362+
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
357363
)
358364
return "/".join(["HLA-" + a for a in reduced_alleles.split("/")])
359365
else:
360-
return self.redux("/".join(self.xx_codes[loc_antigen]), redux_type)
366+
return self.redux(
367+
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
368+
)
361369

362370
# Handle MAC
363371
if self._config["reduce_MAC"] and self.is_mac(glstring):
@@ -389,7 +397,7 @@ def validate(self, glstring):
389397
:return: boolean indicating success
390398
"""
391399
try:
392-
return self.isvalid_gl(glstring)
400+
return self._is_valid_gl(glstring)
393401
except InvalidAlleleError as e:
394402
raise InvalidTypingError(
395403
f"{glstring} is not valid GL String. \n {e.message}", e
@@ -402,7 +410,7 @@ def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> boo
402410
loc_antigen, code = loc_allele[0], loc_allele[1]
403411
else:
404412
return False
405-
return code == "XX" and loc_antigen in self.xx_codes
413+
return code == "XX" and loc_antigen in self.code_mappings.xx_codes
406414

407415
def is_serology(self, allele: str) -> bool:
408416
"""
@@ -423,6 +431,7 @@ def is_serology(self, allele: str) -> bool:
423431

424432
return db.is_valid_serology(self.db_connection, allele)
425433

434+
@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
426435
def is_mac(self, allele: str) -> bool:
427436
"""
428437
MAC has non-digit characters after the : character.
@@ -468,15 +477,15 @@ def _is_who_allele(self, allele):
468477
:param allele: Allele to test
469478
:return: bool to indicate if allele is valid
470479
"""
471-
return allele in self.who_alleles
480+
return allele in self.allele_group.who_alleles
472481

473482
def _is_valid_allele(self, allele):
474483
"""
475484
Test if allele is valid in the current imgt database
476485
:param allele: Allele to test
477486
:return: bool to indicate if allele is valid
478487
"""
479-
return allele in self.valid_alleles
488+
return allele in self.allele_group.alleles
480489

481490
def is_shortnull(self, allele):
482491
"""
@@ -493,7 +502,7 @@ def is_exp_allele(self, allele):
493502
:param allele: Allele to test
494503
:return: bool to indicate if allele is valid
495504
"""
496-
return allele in self.exp_alleles
505+
return allele in self.allele_group.exp_alleles
497506

498507
def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
499508
"""
@@ -574,9 +583,9 @@ def _map_v2_to_v3(self, v2_allele):
574583
v3_allele = self._predict_v3(v2_allele)
575584
return v3_allele
576585

577-
def isvalid(self, allele: str) -> bool:
586+
def _is_valid(self, allele: str) -> bool:
578587
"""
579-
Determines validity of an allele
588+
Determines validity of an allele in various forms
580589
581590
:param allele: An HLA allele.
582591
:type: str
@@ -617,7 +626,7 @@ def isvalid(self, allele: str) -> bool:
617626
return self._is_valid_allele(allele)
618627
return True
619628

620-
def isvalid_gl(self, glstring: str) -> bool:
629+
def _is_valid_gl(self, glstring: str) -> bool:
621630
"""
622631
Determines validity of glstring
623632
@@ -627,62 +636,23 @@ def isvalid_gl(self, glstring: str) -> bool:
627636
:rtype: bool
628637
"""
629638

630-
if re.search(r"\^", glstring):
631-
return all(map(self.isvalid_gl, glstring.split("^")))
632-
if re.search(r"\|", glstring):
633-
return all(map(self.isvalid_gl, glstring.split("|")))
634-
if re.search(r"\+", glstring):
635-
return all(map(self.isvalid_gl, glstring.split("+")))
636-
if re.search("~", glstring):
637-
return all(map(self.isvalid_gl, glstring.split("~")))
638-
if re.search("/", glstring):
639-
return all(map(self.isvalid_gl, glstring.split("/")))
639+
if "^" in glstring:
640+
return all(map(self._is_valid_gl, glstring.split("^")))
641+
if "|" in glstring:
642+
return all(map(self._is_valid_gl, glstring.split("|")))
643+
if "+" in glstring:
644+
return all(map(self._is_valid_gl, glstring.split("+")))
645+
if "~" in glstring:
646+
return all(map(self._is_valid_gl, glstring.split("~")))
647+
if "/" in glstring:
648+
return all(map(self._is_valid_gl, glstring.split("/")))
640649

641650
# what falls through here is an allele
642-
is_valid_allele = self.isvalid(glstring)
651+
is_valid_allele = self._is_valid(glstring)
643652
if not is_valid_allele:
644653
raise InvalidAlleleError(f"{glstring} is not a valid Allele")
645654
return is_valid_allele
646655

647-
def mac_toG(self, allele: str) -> str:
648-
"""
649-
Does ARS reduction with allele and ARS type
650-
651-
:param allele: An HLA allele.
652-
:type: str
653-
:return: ARS reduced allele
654-
:rtype: str
655-
"""
656-
locus_antigen, code = allele.split(":")
657-
if HLA_regex.search(allele):
658-
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
659-
if db.is_valid_mac_code(self.db_connection, code):
660-
alleles = self._get_alleles(code, locus_antigen)
661-
group = [self.toG(a) for a in alleles]
662-
if "X" in group:
663-
raise InvalidMACError(f"{allele} is an invalid MAC.")
664-
else:
665-
return "/".join(group)
666-
else:
667-
raise InvalidMACError(f"{allele} is an invalid MAC.")
668-
669-
def toG(self, allele: str) -> str:
670-
"""
671-
Does ARS reduction to the G group level
672-
673-
:param allele: An HLA allele.
674-
:type: str
675-
:return: ARS G reduced allele
676-
:rtype: str
677-
"""
678-
if allele in self.ars_mappings.g_group:
679-
if allele in self.ars_mappings.dup_g:
680-
return self.ars_mappings.dup_g[allele]
681-
else:
682-
return self.ars_mappings.g_group[allele]
683-
else:
684-
return "X"
685-
686656
def expand_mac(self, mac_code: str):
687657
"""
688658
Expands mac codes

0 commit comments

Comments
 (0)