2020
2121DONORS_DB : pd .DataFrame = pd .DataFrame ()
2222ZEROS : HashableArray = HashableArray ([0 ])
23- ALLELES_IN_CLASS_I : int = 6
24- ALLELES_IN_CLASS_II : int = 4
2523
2624
2725def set_database (donors_db : pd .DataFrame = pd .DataFrame ()):
@@ -39,24 +37,13 @@ def _init_results_df(donors_info):
3937 fields_in_results = {
4038 "Patient_ID" : [],
4139 "Donor_ID" : [],
40+ "GvH_Mismatches" : [],
41+ "HvG_Mismatches" : [],
4242 "Number_Of_Mismatches" : [],
4343 "Matching_Probability" : [],
44- "Match_Probability_A_1" : [],
45- "Match_Probability_A_2" : [],
46- "Match_Probability_B_1" : [],
47- "Match_Probability_B_2" : [],
48- "Match_Probability_C_1" : [],
49- "Match_Probability_C_2" : [],
50- "Match_Probability_DQB1_1" : [],
51- "Match_Probability_DQB1_2" : [],
52- "Match_Probability_DRB1_1" : [],
53- "Match_Probability_DRB1_2" : [],
44+ "Match_Probability" : [],
5445 "Permissive/Non-Permissive" : [],
55- "Match_Between_Most_Commons_A" : [],
56- "Match_Between_Most_Commons_B" : [],
57- "Match_Between_Most_Commons_C" : [],
58- "Match_Between_Most_Commons_DQB" : [],
59- "Match_Between_Most_Commons_DRB" : [],
46+ "Match_Between_Most_Commons" : [],
6047 }
6148
6249 donors_db_fields = DONORS_DB .columns .values .tolist ()
@@ -66,17 +53,26 @@ def _init_results_df(donors_info):
6653 return pd .DataFrame (fields_in_results )
6754
6855
69- def locuses_match_between_genos (geno1 , geno2 ):
56+ def locuses_match_between_genos (geno_pat , geno_don ):
7057 matches = []
71- for i in range (5 ):
72- a1 , b1 = geno1 [2 * i ], geno1 [2 * i + 1 ]
73- a2 , b2 = geno2 [2 * i ], geno2 [2 * i + 1 ]
58+ total_gvh = 0
59+ total_hvg = 0
60+
61+ for i in range (0 , len (geno_pat ), 2 ):
62+ a1 , b1 = geno_pat [i ], geno_pat [i + 1 ]
63+ a2 , b2 = geno_don [i ], geno_don [i + 1 ]
7464
7565 s1 = int (a1 == a2 ) + int (b1 == b2 )
7666 s2 = int (a1 == b2 ) + int (b1 == a2 )
7767 matches .append (max (s1 , s2 ))
7868
79- return matches
69+ p_set = {x for x in (a1 , b1 ) if x not in (None , 0 )}
70+ d_set = {x for x in (a2 , b2 ) if x not in (None , 0 )}
71+
72+ total_gvh += len (p_set - d_set ) # patient has, donor lacks
73+ total_hvg += len (d_set - p_set ) # donor has, patient lacks
74+
75+ return matches , total_gvh , total_hvg
8076
8177
8278class DonorsMatching (object ):
@@ -127,7 +123,7 @@ def probability_to_allele(
127123 ) -> List [float ]:
128124 """Takes a donor ID and a genotype.
129125 Returns the probability of match for each allele"""
130- probs = [0 for _ in range (10 )]
126+ probs = [0 for _ in range (len ( pat_geno ) )]
131127
132128 for i , allele in enumerate (pat_geno ):
133129 p = 0
@@ -148,7 +144,7 @@ def __find_genotype_candidates_from_class(
148144 ) -> Tuple [np .ndarray , np .ndarray ]:
149145 """Takes an integer subclass.
150146 Returns the genotypes (ids and values) which are connected to it in the graph"""
151- return self ._graph .class_neighbors (clss )
147+ return self ._graph .class_neighbors (clss , Len = len ( list ( self . patients . values ())[ 0 ]) )
152148
153149 def __find_donor_from_geno (self , geno_id : int ) -> Sequence [int ]:
154150 """Gets the LOL ID of a genotype.
@@ -216,12 +212,14 @@ def __add_matched_genos_to_graph(
216212
217213 def __classes_and_subclasses_from_genotype (self , genotype : HashableArray ):
218214 subclasses = []
219- classes = [genotype [:ALLELES_IN_CLASS_I ], genotype [ALLELES_IN_CLASS_I :]]
215+ ALLELES_IN_CLASS_I = - 2 * int (- len (genotype )/ 4 - 0.5 )
216+ ALLELES_IN_CLASS_II = len (genotype ) - ALLELES_IN_CLASS_I
217+ classes = [(genotype [:ALLELES_IN_CLASS_I ], 0 ), (genotype [ALLELES_IN_CLASS_I :], 1 )]
220218 num_of_alleles_in_class = [ALLELES_IN_CLASS_I , ALLELES_IN_CLASS_II ]
221219
222- int_classes = [tuple_geno_to_int (tuple (clss ) ) for clss in classes ]
220+ int_classes = [( tuple_geno_to_int (tuple (clss [ 0 ])), clss [ 1 ] ) for clss in classes ]
223221 for clss in int_classes :
224- self ._patients_graph .add_edge (clss , genotype )
222+ self ._patients_graph .add_edge (clss [ 0 ] , genotype )
225223
226224 # class one is considered as 0.
227225 # class two is considered as 1.
@@ -231,14 +229,14 @@ def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
231229 # set the missing allele to always be the second allele in the locus
232230 if k % 2 == 0 :
233231 sub = tuple_geno_to_int (
234- classes [class_num ][0 :k ] + ZEROS + classes [class_num ][k + 1 :]
232+ classes [class_num ][0 ][ 0 :k ] + ZEROS + classes [class_num ][ 0 ][k + 1 :]
235233 )
236234 else :
237235 sub = tuple_geno_to_int (
238- classes [class_num ][0 : k - 1 ]
236+ classes [class_num ][0 ][ 0 : k - 1 ]
239237 + ZEROS
240- + classes [class_num ][k - 1 : k ]
241- + classes [class_num ][k + 1 :]
238+ + classes [class_num ][0 ][ k - 1 : k ]
239+ + classes [class_num ][0 ][ k + 1 :]
242240 )
243241
244242 # missing allele number is the index of the first allele of the locus the missing allele belongs to.
@@ -255,6 +253,8 @@ def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
255253
256254 return int_classes , subclasses
257255
256+
257+
258258 def create_patients_graph (self , f_patients : str ):
259259 """
260260 create patients graph. \n
@@ -300,7 +300,8 @@ def create_patients_graph(self, f_patients: str):
300300 classes_by_patient [patient_id ] = set ()
301301
302302 # sort alleles for each HLA-X
303- for x in range (0 , 10 , 2 ):
303+ l = len (geno )
304+ for x in range (0 , l , 2 ):
304305 geno [x : x + 2 ] = sorted (geno [x : x + 2 ])
305306
306307 geno = HashableArray (geno )
@@ -351,15 +352,14 @@ def find_geno_candidates_by_subclasses(self, subclasses):
351352 genotypes_value ,
352353 ) = self .__find_genotype_candidates_from_subclass (subclass .subclass )
353354
355+ geno = genotypes_value [0 ]
356+ ALLELES_IN_CLASS_I = - 2 * int (- len (geno )/ 4 - 0.5 )
357+ ALLELES_IN_CLASS_II = len (geno ) - ALLELES_IN_CLASS_I
354358 # Checks only the locuses that are not certain to match
355359 if subclass .class_num == 0 :
356- allele_range_to_check = np .array (
357- [6 , 8 , subclass .allele_num ], dtype = np .uint8
358- )
360+ allele_range_to_check = np .array ([x for x in range (len (geno )// 2 + (len (geno )// 2 & 1 ), len (geno ), 2 )] + [subclass .allele_num ], dtype = np .uint8 )
359361 else :
360- allele_range_to_check = np .array (
361- [0 , 2 , 4 , subclass .allele_num ], dtype = np .uint8
362- )
362+ allele_range_to_check = np .array ([x for x in range (0 , len (geno )// 2 + (len (geno )// 2 & 1 ), 2 )] + [subclass .allele_num ], dtype = np .uint8 )
363363
364364 # number of alleles that already match due to match in subclass
365365 matched_alleles : int = (
@@ -383,9 +383,9 @@ def find_geno_candidates_by_classes(self, classes):
383383 desc = "finding classes matching candidates" ,
384384 disable = not self .verbose ,
385385 ):
386- if self ._graph .in_nodes (clss ):
386+ if self ._graph .in_nodes (clss [ 0 ] ):
387387 patient_genos = self ._patients_graph .neighbors (
388- clss
388+ clss [ 0 ]
389389 ) # The patient's genotypes which might be match
390390 (
391391 genotypes_ids ,
@@ -395,12 +395,14 @@ def find_geno_candidates_by_classes(self, classes):
395395 # Checks only the locuses that are not certain to match (the locuses of the other class)
396396 # Class I appearances: 3 locuses = 6 alleles = 23/24 digits
397397 # Class II appearances: 2 locuses = 4 alleles = 15/16 digits
398- if len (str (clss )) > 20 :
399- allele_range_to_check = np .array ([6 , 8 ], dtype = np .uint8 )
400- matched_alleles : int = 6
398+ geno = list (self .patients .values ())[0 ]
399+ if clss [1 ] == 0 :
400+ allele_range_to_check = np .array ([x for x in range (len (geno )// 2 + (len (geno )// 2 & 1 ), len (geno ), 2 )], dtype = np .uint8 )
401+ matched_alleles : int = len (geno )// 2 + (len (geno )// 2 & 1 )
402+
401403 else :
402- allele_range_to_check = np .array ([0 , 2 , 4 ], dtype = np .uint8 )
403- matched_alleles : int = 4
404+ allele_range_to_check = np .array ([x for x in range ( 0 , len ( geno ) // 2 + ( len ( geno ) // 2 & 1 ), 2 ) ], dtype = np .uint8 )
405+ matched_alleles : int = len ( geno ) // 2 - ( len ( geno ) // 2 & 1 )
404406
405407 # Compares the candidate to the patient's genotypes, and adds the match geno candidates to the graph.
406408 self .__add_matched_genos_to_graph (
@@ -441,7 +443,7 @@ def find_geno_candidates_by_genotypes(self, patient_id: int):
441443 # and each patient connects only to their own genos, so we wouldn't override the weight dict.
442444 # self._patients_graph.add_edge(patient_id, geno_id, weight={geno_num: [probability, 10]}) # AMIT DELETE
443445 self ._genotype_candidates [patient_id ][geno_id ] = {
444- geno_num : (probability , 10 )
446+ geno_num : (probability , len ( geno ) )
445447 } # AMIT ADD
446448 # else:
447449 # print(f"Missing 'geno_num' for patient_id: {patient_id}")
@@ -507,7 +509,7 @@ def score_matches(
507509 ].items (): # AMIT ADD
508510 for prob , matches in genotype_matches .values (): # AMIT CHANGE
509511 # match_info = (probability of patient's genotype, number of matches to patient's genotype)
510- if matches != 10 - mismatch :
512+ if matches != len ( list ( self . patients . values ())[ 0 ]) - mismatch :
511513 continue
512514
513515 # add the probabilities multiplication of the patient and all the donors that has this genotype
@@ -568,38 +570,32 @@ def __append_matching_donor(
568570 mm_number : int ,
569571 ) -> None :
570572 """add a donor to the matches dictionary"""
571-
572- compare_commons = locuses_match_between_genos (
573- self .patients [patient ], self .get_most_common_genotype (donor )
573+ pat = self .patients [patient ]
574+ don = self .get_most_common_genotype (donor )
575+ compare_commons , gvh , hvg = locuses_match_between_genos (
576+ pat , don
574577 )
575578
576579 add_donors ["Patient_ID" ].append (patient )
577580 add_donors ["Donor_ID" ].append (donor )
578581 allele_prob = self .probability_to_allele (
579582 don_id = donor , pat_geno = self .patients [patient ]
580583 )
581- add_donors ["Match_Probability_A_1" ].append (allele_prob [0 ])
582- add_donors ["Match_Probability_A_2" ].append (allele_prob [1 ])
583- add_donors ["Match_Probability_B_1" ].append (allele_prob [2 ])
584- add_donors ["Match_Probability_B_2" ].append (allele_prob [3 ])
585- add_donors ["Match_Probability_C_1" ].append (allele_prob [4 ])
586- add_donors ["Match_Probability_C_2" ].append (allele_prob [5 ])
587- add_donors ["Match_Probability_DQB1_1" ].append (allele_prob [6 ])
588- add_donors ["Match_Probability_DQB1_2" ].append (allele_prob [7 ])
589- add_donors ["Match_Probability_DRB1_1" ].append (allele_prob [8 ])
590- add_donors ["Match_Probability_DRB1_2" ].append (allele_prob [9 ])
591-
592- add_donors ["Match_Between_Most_Commons_A" ].append (compare_commons [0 ])
593- add_donors ["Match_Between_Most_Commons_B" ].append (compare_commons [1 ])
594- add_donors ["Match_Between_Most_Commons_C" ].append (compare_commons [2 ])
595- add_donors ["Match_Between_Most_Commons_DQB" ].append (compare_commons [3 ])
596- add_donors ["Match_Between_Most_Commons_DRB" ].append (compare_commons [4 ])
584+ add_donors ["Match_Probability" ].append (allele_prob )
585+ add_donors ["Match_Between_Most_Commons" ].append (compare_commons )
597586
598587 add_donors ["Matching_Probability" ].append (match_prob )
599- add_donors ["Number_Of_Mismatches" ].append (mm_number )
600- add_donors ["Permissive/Non-Permissive" ].append (
601- "-"
602- ) # TODO: add permissiveness algorithm
588+ actual_mismatches = 0
589+ for match_score in compare_commons :
590+ if match_score != 2 :
591+ actual_mismatches += (2 - match_score )
592+
593+ add_donors ["Number_Of_Mismatches" ].append (actual_mismatches )
594+ add_donors ["GvH_Mismatches" ].append (gvh )
595+ add_donors ["HvG_Mismatches" ].append (hvg )
596+
597+ add_donors ["Permissive/Non-Permissive" ].append ("-" )
598+ # TODO: add permissiveness algorithm
603599
604600 # add the other given fields to the results
605601 for field in donors_info :
0 commit comments