1010from grma .donorsgraph .create_lol import LolBuilder
1111from grma .match .graph_wrapper import Graph
1212from grma .utilities .geno_representation import HashableArray
13- from grma .utilities .utils import gl_string_to_integers , tuple_geno_to_int , print_time
14-
15- CLASS_I_END = 6
13+ from grma .utilities .utils import print_time , geno_to_int , gl_string_to_hash
14+ from bidict import bidict
1615
1716
1817class BuildMatchingGraph :
@@ -21,7 +20,7 @@ class BuildMatchingGraph:
2120 It gets a path to directory with the donors' file, builds the graph and saved it as LOL graph using Cython.
2221 """
2322
24- __slots__ = "_verbose" , "_graph" , "_edges"
23+ __slots__ = "_verbose" , "_graph" , "_edges" , "bidirectional_dict"
2524
2625 def __init__ (self , path_to_donors_directory : str , verbose : bool = False ):
2726 """
@@ -33,19 +32,20 @@ def __init__(self, path_to_donors_directory: str, verbose: bool = False):
3332 self ._verbose = verbose
3433 self ._graph = None # LOL dict-representation
3534 self ._edges : List [Edge ] = [] # edge-list
35+ self .bidirectional_dict = bidict ()
3636 self ._save_graph_as_edges (path_to_donors_directory )
3737
3838 def _create_classes_edges (self , geno , class_ , layers ):
39- int_class = tuple_geno_to_int ( class_ )
39+ hash_class = gl_string_to_hash ( str ( class_ )) % 1000000000 + 1000000000
4040
41- self ._edges .append (Edge (int_class , geno , 0 ))
41+ self ._edges .append (Edge (hash_class , geno , 0 ))
4242
4343 # check if the class node was created
44- if int_class not in layers ["CLASS" ]:
45- layers ["CLASS" ].add (int_class )
46- self ._create_subclass_edges (class_ , int_class , layers )
44+ if hash_class not in layers ["CLASS" ]:
45+ layers ["CLASS" ].add (hash_class )
46+ self ._create_subclass_edges (class_ , hash_class , layers )
4747
48- def _create_subclass_edges (self , class_ , int_class , layers ):
48+ def _create_subclass_edges (self , class_ , hash_class , layers ):
4949 """
5050 subclasses edges are created by dropping an allele from a class.
5151 each allele we drop, will be replaced with zero,
@@ -58,19 +58,18 @@ def _create_subclass_edges(self, class_, int_class, layers):
5858 # set the missing allele to always be the second allele in the locus
5959 for i in range (num_of_alleles ):
6060 if i % 2 == 0 :
61- subclass_alleles .add (
62- tuple_geno_to_int (tuple (class_ [0 :i ] + (0 ,) + class_ [i + 1 :]))
63- )
61+ subclass = tuple (class_ [0 :i ] + (0 ,) + class_ [i + 1 :])
6462 else :
65- subclass_alleles .add (
66- tuple_geno_to_int (
67- tuple (class_ [0 : i - 1 ] + (0 , class_ [i - 1 ]) + class_ [i + 1 :])
68- )
63+ subclass = tuple (
64+ class_ [0 : i - 1 ] + (0 , class_ [i - 1 ]) + class_ [i + 1 :]
6965 )
7066
67+ hash_subclass = gl_string_to_hash (str (subclass )) % 1000000000 + 2000000000
68+ subclass_alleles .add (hash_subclass )
69+
7170 # add subclass->class edges
7271 for sub in subclass_alleles :
73- self ._edges .append (Edge (sub , int_class , 0 ))
72+ self ._edges .append (Edge (sub , hash_class , 0 ))
7473 if sub not in layers ["SUBCLASS" ]:
7574 layers ["SUBCLASS" ].add (sub )
7675
@@ -103,16 +102,27 @@ def _save_graph_as_edges(self, path_to_donors_directory: str | os.PathLike):
103102 ):
104103 # retrieve all line's parameters
105104 donor_id , geno , probability , index = line .strip ().split ("," )
106- donor_id = int (donor_id )
105+ donor_id = - 1 * int (donor_id )
107106 index = int (index )
108107 probability = float (probability )
109108
110109 # convert geno to list of integers
111- geno = gl_string_to_integers (geno )
112-
113- # sort alleles for each HLA-X
114- for x in range (0 , 10 , 2 ):
115- geno [x : x + 2 ] = sorted (geno [x : x + 2 ])
110+ alleles = [
111+ allele
112+ for locus in geno .split ("^" )
113+ for allele in locus .split ("+" )
114+ ]
115+ geno = []
116+ for allele in alleles :
117+ if "N" in allele :
118+ geno .append (0 )
119+ elif allele in self .bidirectional_dict :
120+ geno .append (self .bidirectional_dict [allele ])
121+ else :
122+ self .bidirectional_dict [allele ] = (
123+ len (self .bidirectional_dict ) + 3
124+ )
125+ geno .append (self .bidirectional_dict [allele ])
116126 geno = HashableArray (geno )
117127
118128 # handle new donor appearance in file
@@ -137,6 +147,7 @@ def _save_graph_as_edges(self, path_to_donors_directory: str | os.PathLike):
137147 # continue creation of classes and subclasses
138148 if geno not in layers ["GENOTYPE" ]:
139149 layers ["GENOTYPE" ].add (geno )
150+ CLASS_I_END = - 2 * int (- len (geno ) / 4 - 0.5 )
140151 geno_class1 = tuple (geno [:CLASS_I_END ])
141152 geno_class2 = tuple (geno [CLASS_I_END :])
142153 self ._create_classes_edges (geno , geno_class1 , layers )
@@ -177,4 +188,5 @@ def to_pickle(self, path: Union[str, os.PathLike]):
177188
178189 :param path: A path to save the pickled object
179190 """
180- pickle .dump (self ._graph , open (path , "wb" ))
191+ graph_bdict = [self ._graph , self .bidirectional_dict ]
192+ pickle .dump (graph_bdict , open (path , "wb" ))
0 commit comments