2020
2121DONORS_DB : pd .DataFrame = pd .DataFrame ()
2222ZEROS : HashableArray = HashableArray ([0 ])
23- ALLELES_IN_CLASS_I : int = 6
24- ALLELES_IN_CLASS_II : int = 4
2523
2624
2725def set_database (donors_db : pd .DataFrame = pd .DataFrame ()):
@@ -43,22 +41,9 @@ def _init_results_df(donors_info):
4341 "HvG_Mismatches" : [],
4442 "Number_Of_Mismatches" : [],
4543 "Matching_Probability" : [],
46- "Match_Probability_A_1" : [],
47- "Match_Probability_A_2" : [],
48- "Match_Probability_B_1" : [],
49- "Match_Probability_B_2" : [],
50- "Match_Probability_C_1" : [],
51- "Match_Probability_C_2" : [],
52- "Match_Probability_DQB1_1" : [],
53- "Match_Probability_DQB1_2" : [],
54- "Match_Probability_DRB1_1" : [],
55- "Match_Probability_DRB1_2" : [],
44+ "Match_Probability" : [],
5645 "Permissive/Non-Permissive" : [],
57- "Match_Between_Most_Commons_A" : [],
58- "Match_Between_Most_Commons_B" : [],
59- "Match_Between_Most_Commons_C" : [],
60- "Match_Between_Most_Commons_DQB" : [],
61- "Match_Between_Most_Commons_DRB" : [],
46+ "Match_Between_Most_Commons" : [],
6247 }
6348
6449 donors_db_fields = DONORS_DB .columns .values .tolist ()
@@ -68,17 +53,26 @@ def _init_results_df(donors_info):
6853 return pd .DataFrame (fields_in_results )
6954
7055
71- def locuses_match_between_genos (geno1 , geno2 ):
56+ def locuses_match_between_genos (geno_pat , geno_don ):
7257 matches = []
73- for i in range (5 ):
74- a1 , b1 = geno1 [2 * i ], geno1 [2 * i + 1 ]
75- a2 , b2 = geno2 [2 * i ], geno2 [2 * i + 1 ]
58+ total_gvh = 0
59+ total_hvg = 0
60+
61+ for i in range (0 , len (geno_pat ), 2 ):
62+ a1 , b1 = geno_pat [i ], geno_pat [i + 1 ]
63+ a2 , b2 = geno_don [i ], geno_don [i + 1 ]
7664
7765 s1 = int (a1 == a2 ) + int (b1 == b2 )
7866 s2 = int (a1 == b2 ) + int (b1 == a2 )
7967 matches .append (max (s1 , s2 ))
8068
81- return matches
69+ p_set = {x for x in (a1 , b1 ) if x not in (None , 0 )}
70+ d_set = {x for x in (a2 , b2 ) if x not in (None , 0 )}
71+
72+ total_gvh += len (p_set - d_set ) # patient has, donor lacks
73+ total_hvg += len (d_set - p_set ) # donor has, patient lacks
74+
75+ return matches , total_gvh , total_hvg
8276
8377
8478class DonorsMatching (object ):
@@ -129,7 +123,7 @@ def probability_to_allele(
129123 ) -> List [float ]:
130124 """Takes a donor ID and a genotype.
131125 Returns the probability of match for each allele"""
132- probs = [0 for _ in range (10 )]
126+ probs = [0 for _ in range (len ( pat_geno ) )]
133127
134128 for i , allele in enumerate (pat_geno ):
135129 p = 0
@@ -150,7 +144,7 @@ def __find_genotype_candidates_from_class(
150144 ) -> Tuple [np .ndarray , np .ndarray ]:
151145 """Takes an integer subclass.
152146 Returns the genotypes (ids and values) which are connected to it in the graph"""
153- return self ._graph .class_neighbors (clss )
147+ return self ._graph .class_neighbors (clss , Len = len ( self . patients [ 0 ]) )
154148
155149 def __find_donor_from_geno (self , geno_id : int ) -> Sequence [int ]:
156150 """Gets the LOL ID of a genotype.
@@ -218,6 +212,8 @@ def __add_matched_genos_to_graph(
218212
219213 def __classes_and_subclasses_from_genotype (self , genotype : HashableArray ):
220214 subclasses = []
215+ ALLELES_IN_CLASS_I = - 2 * int (- len (genotype )/ 4 - 0.5 )
216+ ALLELES_IN_CLASS_II = len (genotype ) - ALLELES_IN_CLASS_I
221217 classes = [genotype [:ALLELES_IN_CLASS_I ], genotype [ALLELES_IN_CLASS_I :]]
222218 num_of_alleles_in_class = [ALLELES_IN_CLASS_I , ALLELES_IN_CLASS_II ]
223219
@@ -257,34 +253,7 @@ def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
257253
258254 return int_classes , subclasses
259255
260- def count_GvH_HvG (
261- self ,
262- pat_geno : Sequence [int ],
263- don_geno : Sequence [int ],
264- ) -> Tuple [int , int ]:
265- """
266- Count GvH and HvG mismatches locus by locus by set‐difference.
267- Each locus is two slots in the genotype lists:
268- A: indices [0,1], B: [2,3], C: [4,5], DQB1: [6,7], DRB1: [8,9]
269- We drop any “N” (here encoded as 0 or None), then:
270- GvH = | patient_set – donor_set |
271- HvG = | donor_set – patient_set |
272- Sum over all five loci.
273- """
274- total_gvh = 0
275- total_hvg = 0
276-
277- for i in range (0 , 10 , 2 ):
278- # build the allele sets, filtering out N/None/0
279- p_set = {a for a in (pat_geno [i ], pat_geno [i + 1 ]) if a not in (None , 0 )}
280- d_set = {a for a in (don_geno [i ], don_geno [i + 1 ]) if a not in (None , 0 )}
281-
282- # how many the patient has that the donor doesn’t:
283- total_gvh += len (p_set - d_set )
284- # how many the donor has that the patient doesn’t:
285- total_hvg += len (d_set - p_set )
286256
287- return total_gvh , total_hvg
288257
289258 def create_patients_graph (self , f_patients : str ):
290259 """
@@ -331,7 +300,8 @@ def create_patients_graph(self, f_patients: str):
331300 classes_by_patient [patient_id ] = set ()
332301
333302 # sort alleles for each HLA-X
334- for x in range (0 , 10 , 2 ):
303+ l = len (geno )
304+ for x in range (0 , l , 2 ):
335305 geno [x : x + 2 ] = sorted (geno [x : x + 2 ])
336306
337307 geno = HashableArray (geno )
@@ -382,14 +352,18 @@ def find_geno_candidates_by_subclasses(self, subclasses):
382352 genotypes_value ,
383353 ) = self .__find_genotype_candidates_from_subclass (subclass .subclass )
384354
355+ geno = genotypes_value [0 ]
356+ ALLELES_IN_CLASS_I = - 2 * int (- len (geno )/ 4 - 0.5 )
357+ ALLELES_IN_CLASS_II = len (geno ) - ALLELES_IN_CLASS_I
385358 # Checks only the locuses that are not certain to match
386359 if subclass .class_num == 0 :
387360 allele_range_to_check = np .array (
388- [6 , 8 , subclass .allele_num ], dtype = np .uint8
361+ [c for c in range (ALLELES_IN_CLASS_I , ALLELES_IN_CLASS_I + ALLELES_IN_CLASS_I - 2 , 2 )] + [subclass .allele_num ],
362+ dtype = np .uint8
389363 )
390364 else :
391365 allele_range_to_check = np .array (
392- [0 , 2 , 4 , subclass .allele_num ], dtype = np .uint8
366+ [c for c in range ( 0 , ALLELES_IN_CLASS_I , 2 )] + [ subclass .allele_num ], dtype = np .uint8
393367 )
394368
395369 # number of alleles that already match due to match in subclass
@@ -472,7 +446,7 @@ def find_geno_candidates_by_genotypes(self, patient_id: int):
472446 # and each patient connects only to their own genos, so we wouldn't override the weight dict.
473447 # self._patients_graph.add_edge(patient_id, geno_id, weight={geno_num: [probability, 10]}) # AMIT DELETE
474448 self ._genotype_candidates [patient_id ][geno_id ] = {
475- geno_num : (probability , 10 )
449+ geno_num : (probability , len ( geno ) )
476450 } # AMIT ADD
477451 # else:
478452 # print(f"Missing 'geno_num' for patient_id: {patient_id}")
@@ -538,7 +512,7 @@ def score_matches(
538512 ].items (): # AMIT ADD
539513 for prob , matches in genotype_matches .values (): # AMIT CHANGE
540514 # match_info = (probability of patient's genotype, number of matches to patient's genotype)
541- if matches != 10 - mismatch :
515+ if matches != len ( self . patients [ 1 ]) - mismatch :
542516 continue
543517
544518 # add the probabilities multiplication of the patient and all the donors that has this genotype
@@ -599,46 +573,27 @@ def __append_matching_donor(
599573 mm_number : int ,
600574 ) -> None :
601575 """add a donor to the matches dictionary"""
602-
603- compare_commons = locuses_match_between_genos (
604- self .patients [patient ], self .get_most_common_genotype (donor )
576+ pat = self .patients [patient ]
577+ don = self .get_most_common_genotype (donor )
578+ compare_commons , gvh , hvg = locuses_match_between_genos (
579+ pat , don
605580 )
606581
607582 add_donors ["Patient_ID" ].append (patient )
608583 add_donors ["Donor_ID" ].append (donor )
609584 allele_prob = self .probability_to_allele (
610585 don_id = donor , pat_geno = self .patients [patient ]
611586 )
612- add_donors ["Match_Probability_A_1" ].append (allele_prob [0 ])
613- add_donors ["Match_Probability_A_2" ].append (allele_prob [1 ])
614- add_donors ["Match_Probability_B_1" ].append (allele_prob [2 ])
615- add_donors ["Match_Probability_B_2" ].append (allele_prob [3 ])
616- add_donors ["Match_Probability_C_1" ].append (allele_prob [4 ])
617- add_donors ["Match_Probability_C_2" ].append (allele_prob [5 ])
618- add_donors ["Match_Probability_DQB1_1" ].append (allele_prob [6 ])
619- add_donors ["Match_Probability_DQB1_2" ].append (allele_prob [7 ])
620- add_donors ["Match_Probability_DRB1_1" ].append (allele_prob [8 ])
621- add_donors ["Match_Probability_DRB1_2" ].append (allele_prob [9 ])
622-
623- add_donors ["Match_Between_Most_Commons_A" ].append (compare_commons [0 ])
624- add_donors ["Match_Between_Most_Commons_B" ].append (compare_commons [1 ])
625- add_donors ["Match_Between_Most_Commons_C" ].append (compare_commons [2 ])
626- add_donors ["Match_Between_Most_Commons_DQB" ].append (compare_commons [3 ])
627- add_donors ["Match_Between_Most_Commons_DRB" ].append (compare_commons [4 ])
587+ add_donors ["Match_Probability" ].append (allele_prob )
588+ add_donors ["Match_Between_Most_Commons" ].append (compare_commons )
628589
629590 add_donors ["Matching_Probability" ].append (match_prob )
630-
631591 actual_mismatches = 0
632592 for match_score in compare_commons :
633593 if match_score != 2 :
634594 actual_mismatches += (2 - match_score )
635595
636596 add_donors ["Number_Of_Mismatches" ].append (actual_mismatches )
637-
638- # compute GvH / HvG counts
639- pat = self .patients [patient ]
640- don = self .get_most_common_genotype (donor )
641- gvh , hvg = self .count_GvH_HvG (pat , don )
642597 add_donors ["GvH_Mismatches" ].append (gvh )
643598 add_donors ["HvG_Mismatches" ].append (hvg )
644599
0 commit comments