2626import sys
2727from typing import Iterable , List
2828
29- from . import broad_splits
29+ from . import broad_splits , smart_sort
3030from . import data_repository as dr
3131from . import db
3232from .exceptions import InvalidAlleleError , InvalidMACError , InvalidTypingError
4141 expression_chars ,
4242 DEFAULT_CACHE_SIZE ,
4343)
44- from .smart_sort import smart_sort_comparator
4544
4645default_config = {
4746 "reduce_serology" : True ,
5655 "verbose_log" : True ,
5756}
5857
58+
5959# Typing information
6060
6161
@@ -95,17 +95,16 @@ def __init__(
9595 self .ars_mappings = dr .generate_ars_mapping (self .db_connection , imgt_version )
9696 # Load Alleles and XX Codes
9797 (
98- self .valid_alleles ,
99- self .who_alleles ,
100- self .xx_codes ,
101- self .who_group ,
102- self .exp_alleles ,
98+ self .code_mappings ,
99+ self .allele_group ,
103100 ) = dr .generate_alleles_and_xx_codes_and_who (
104101 self .db_connection , imgt_version , self .ars_mappings
105102 )
106103
107104 # Generate short nulls from WHO mapping
108- self .shortnulls = dr .generate_short_nulls (self .db_connection , self .who_group )
105+ self .shortnulls = dr .generate_short_nulls (
106+ self .db_connection , self .code_mappings .who_group
107+ )
109108
110109 # Load Serology mappings
111110 broad_splits .broad_splits_ser_mapping = (
@@ -128,6 +127,12 @@ def __init__(
128127 self ._redux_allele
129128 )
130129 self .redux = functools .lru_cache (maxsize = max_cache_size )(self .redux )
130+ self .is_mac = functools .lru_cache (maxsize = max_cache_size )(self .is_mac )
131+ self .smart_sort_comparator = functools .lru_cache (maxsize = max_cache_size )(
132+ smart_sort .smart_sort_comparator
133+ )
134+ else :
135+ self .smart_sort_comparator = smart_sort .smart_sort_comparator
131136
132137 # reference data is read-only and can be frozen
133138 # Works only for Python >= 3.9
@@ -213,8 +218,10 @@ def _redux_allele(
213218 # new redux_type which is full WHO expansion
214219 if self ._is_who_allele (allele ):
215220 return allele
216- if allele in self .who_group :
217- return self .redux ("/" .join (self .who_group [allele ]), redux_type )
221+ if allele in self .code_mappings .who_group :
222+ return self .redux (
223+ "/" .join (self .code_mappings .who_group [allele ]), redux_type
224+ )
218225 else :
219226 return allele
220227 elif redux_type == "exon" :
@@ -254,8 +261,7 @@ def _redux_allele(
254261 else :
255262 raise InvalidAlleleError (f"{ allele } is an invalid allele." )
256263
257- @staticmethod
258- def sorted_unique_gl (gls : List [str ], delim : str ) -> str :
264+ def _sorted_unique_gl (self , gls : List [str ], delim : str ) -> str :
259265 """
260266 Make a list of sorted unique GL Strings separated by delim.
261267 As the list may itself contains elements that are separated by the
@@ -272,7 +278,7 @@ def sorted_unique_gl(gls: List[str], delim: str) -> str:
272278 if delim == "+" :
273279 # No need to make unique. eg. homozygous cases are valid for SLUGs
274280 return delim .join (
275- sorted (gls , key = functools .cmp_to_key (smart_sort_comparator ))
281+ sorted (gls , key = functools .cmp_to_key (self . smart_sort_comparator ))
276282 )
277283
278284 # generate a unique list over a delimiter
@@ -282,7 +288,7 @@ def sorted_unique_gl(gls: List[str], delim: str) -> str:
282288 all_gls += gl .split (delim )
283289 unique_gls = set (all_gls )
284290 return delim .join (
285- sorted (unique_gls , key = functools .cmp_to_key (smart_sort_comparator ))
291+ sorted (unique_gls , key = functools .cmp_to_key (self . smart_sort_comparator ))
286292 )
287293
288294 @functools .lru_cache (maxsize = DEFAULT_CACHE_SIZE )
@@ -302,28 +308,28 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
302308
303309 self .validate (glstring )
304310
305- if re . search ( r"\^" , glstring ) :
306- return self .sorted_unique_gl (
311+ if "^" in glstring :
312+ return self ._sorted_unique_gl (
307313 [self .redux (a , redux_type ) for a in glstring .split ("^" )], "^"
308314 )
309315
310- if re . search ( r"\|" , glstring ) :
311- return self .sorted_unique_gl (
316+ if "|" in glstring :
317+ return self ._sorted_unique_gl (
312318 [self .redux (a , redux_type ) for a in glstring .split ("|" )], "|"
313319 )
314320
315- if re . search ( r"\+" , glstring ) :
316- return self .sorted_unique_gl (
321+ if "+" in glstring :
322+ return self ._sorted_unique_gl (
317323 [self .redux (a , redux_type ) for a in glstring .split ("+" )], "+"
318324 )
319325
320- if re . search ( "~" , glstring ) :
321- return self .sorted_unique_gl (
326+ if "~" in glstring :
327+ return self ._sorted_unique_gl (
322328 [self .redux (a , redux_type ) for a in glstring .split ("~" )], "~"
323329 )
324330
325- if re . search ( "/" , glstring ) :
326- return self .sorted_unique_gl (
331+ if "/" in glstring :
332+ return self ._sorted_unique_gl (
327333 [self .redux (a , redux_type ) for a in glstring .split ("/" )], "/"
328334 )
329335
@@ -353,11 +359,13 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
353359 if self .is_XX (glstring , loc_antigen , code ):
354360 if is_hla_prefix :
355361 reduced_alleles = self .redux (
356- "/" .join (self .xx_codes [loc_antigen ]), redux_type
362+ "/" .join (self .code_mappings . xx_codes [loc_antigen ]), redux_type
357363 )
358364 return "/" .join (["HLA-" + a for a in reduced_alleles .split ("/" )])
359365 else :
360- return self .redux ("/" .join (self .xx_codes [loc_antigen ]), redux_type )
366+ return self .redux (
367+ "/" .join (self .code_mappings .xx_codes [loc_antigen ]), redux_type
368+ )
361369
362370 # Handle MAC
363371 if self ._config ["reduce_MAC" ] and self .is_mac (glstring ):
@@ -389,7 +397,7 @@ def validate(self, glstring):
389397 :return: boolean indicating success
390398 """
391399 try :
392- return self .isvalid_gl (glstring )
400+ return self ._is_valid_gl (glstring )
393401 except InvalidAlleleError as e :
394402 raise InvalidTypingError (
395403 f"{ glstring } is not valid GL String. \n { e .message } " , e
@@ -402,7 +410,7 @@ def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> boo
402410 loc_antigen , code = loc_allele [0 ], loc_allele [1 ]
403411 else :
404412 return False
405- return code == "XX" and loc_antigen in self .xx_codes
413+ return code == "XX" and loc_antigen in self .code_mappings . xx_codes
406414
407415 def is_serology (self , allele : str ) -> bool :
408416 """
@@ -423,6 +431,7 @@ def is_serology(self, allele: str) -> bool:
423431
424432 return db .is_valid_serology (self .db_connection , allele )
425433
434+ @functools .lru_cache (maxsize = DEFAULT_CACHE_SIZE )
426435 def is_mac (self , allele : str ) -> bool :
427436 """
428437 MAC has non-digit characters after the : character.
@@ -468,15 +477,15 @@ def _is_who_allele(self, allele):
468477 :param allele: Allele to test
469478 :return: bool to indicate if allele is valid
470479 """
471- return allele in self .who_alleles
480+ return allele in self .allele_group . who_alleles
472481
473482 def _is_valid_allele (self , allele ):
474483 """
475484 Test if allele is valid in the current imgt database
476485 :param allele: Allele to test
477486 :return: bool to indicate if allele is valid
478487 """
479- return allele in self .valid_alleles
488+ return allele in self .allele_group . alleles
480489
481490 def is_shortnull (self , allele ):
482491 """
@@ -493,7 +502,7 @@ def is_exp_allele(self, allele):
493502 :param allele: Allele to test
494503 :return: bool to indicate if allele is valid
495504 """
496- return allele in self .exp_alleles
505+ return allele in self .allele_group . exp_alleles
497506
498507 def _get_alleles (self , code , locus_antigen ) -> Iterable [str ]:
499508 """
@@ -574,9 +583,9 @@ def _map_v2_to_v3(self, v2_allele):
574583 v3_allele = self ._predict_v3 (v2_allele )
575584 return v3_allele
576585
577- def isvalid (self , allele : str ) -> bool :
586+ def _is_valid (self , allele : str ) -> bool :
578587 """
579- Determines validity of an allele
588+ Determines validity of an allele in various forms
580589
581590 :param allele: An HLA allele.
582591 :type: str
@@ -617,7 +626,7 @@ def isvalid(self, allele: str) -> bool:
617626 return self ._is_valid_allele (allele )
618627 return True
619628
620- def isvalid_gl (self , glstring : str ) -> bool :
629+ def _is_valid_gl (self , glstring : str ) -> bool :
621630 """
622631 Determines validity of glstring
623632
@@ -627,62 +636,23 @@ def isvalid_gl(self, glstring: str) -> bool:
627636 :rtype: bool
628637 """
629638
630- if re . search ( r"\^" , glstring ) :
631- return all (map (self .isvalid_gl , glstring .split ("^" )))
632- if re . search ( r"\|" , glstring ) :
633- return all (map (self .isvalid_gl , glstring .split ("|" )))
634- if re . search ( r"\+" , glstring ) :
635- return all (map (self .isvalid_gl , glstring .split ("+" )))
636- if re . search ( "~" , glstring ) :
637- return all (map (self .isvalid_gl , glstring .split ("~" )))
638- if re . search ( "/" , glstring ) :
639- return all (map (self .isvalid_gl , glstring .split ("/" )))
639+ if "^" in glstring :
640+ return all (map (self ._is_valid_gl , glstring .split ("^" )))
641+ if "|" in glstring :
642+ return all (map (self ._is_valid_gl , glstring .split ("|" )))
643+ if "+" in glstring :
644+ return all (map (self ._is_valid_gl , glstring .split ("+" )))
645+ if "~" in glstring :
646+ return all (map (self ._is_valid_gl , glstring .split ("~" )))
647+ if "/" in glstring :
648+ return all (map (self ._is_valid_gl , glstring .split ("/" )))
640649
641650 # what falls through here is an allele
642- is_valid_allele = self .isvalid (glstring )
651+ is_valid_allele = self ._is_valid (glstring )
643652 if not is_valid_allele :
644653 raise InvalidAlleleError (f"{ glstring } is not a valid Allele" )
645654 return is_valid_allele
646655
647- def mac_toG (self , allele : str ) -> str :
648- """
649- Does ARS reduction with allele and ARS type
650-
651- :param allele: An HLA allele.
652- :type: str
653- :return: ARS reduced allele
654- :rtype: str
655- """
656- locus_antigen , code = allele .split (":" )
657- if HLA_regex .search (allele ):
658- locus_antigen = locus_antigen .split ("-" )[1 ] # Remove HLA- prefix
659- if db .is_valid_mac_code (self .db_connection , code ):
660- alleles = self ._get_alleles (code , locus_antigen )
661- group = [self .toG (a ) for a in alleles ]
662- if "X" in group :
663- raise InvalidMACError (f"{ allele } is an invalid MAC." )
664- else :
665- return "/" .join (group )
666- else :
667- raise InvalidMACError (f"{ allele } is an invalid MAC." )
668-
669- def toG (self , allele : str ) -> str :
670- """
671- Does ARS reduction to the G group level
672-
673- :param allele: An HLA allele.
674- :type: str
675- :return: ARS G reduced allele
676- :rtype: str
677- """
678- if allele in self .ars_mappings .g_group :
679- if allele in self .ars_mappings .dup_g :
680- return self .ars_mappings .dup_g [allele ]
681- else :
682- return self .ars_mappings .g_group [allele ]
683- else :
684- return "X"
685-
686656 def expand_mac (self , mac_code : str ):
687657 """
688658 Expands mac codes
0 commit comments