@@ -26,8 +26,13 @@ def filter_results(res_haps, extra_gl):
2626
2727 split_extra_gl_into_locus = extra_gl .split ("^" )
2828
29- dct = {locus .split ("*" )[0 ]: [set (locus .split ("+" )[0 ].split ("/" )), set (locus .split ("+" )[1 ].split ("/" ))]
30- for locus in split_extra_gl_into_locus }
29+ dct = {
30+ locus .split ("*" )[0 ]: [
31+ set (locus .split ("+" )[0 ].split ("/" )),
32+ set (locus .split ("+" )[1 ].split ("/" )),
33+ ]
34+ for locus in split_extra_gl_into_locus
35+ }
3136
3237 haps = res_haps ["Haps" ]
3338 filter_idx = []
@@ -37,8 +42,10 @@ def filter_results(res_haps, extra_gl):
3742 for allele1 , allele2 in zip (hap1 .split ("~" ), hap2 .split ("~" )):
3843 loc = allele1 .split ("*" )[0 ]
3944 if loc in dct :
40- if not ((allele1 in dct [loc ][0 ] and allele2 in dct [loc ][1 ]) or (
41- allele1 in dct [loc ][1 ] and allele2 in dct [loc ][0 ])):
45+ if not (
46+ (allele1 in dct [loc ][0 ] and allele2 in dct [loc ][1 ])
47+ or (allele1 in dct [loc ][1 ] and allele2 in dct [loc ][0 ])
48+ ):
4249 check = False
4350 break
4451 if check :
@@ -56,13 +63,13 @@ def create_subject_dict(file_path):
5663 subject_dict = {}
5764
5865 # Open and read the file
59- with open (file_path , 'r' ) as file :
66+ with open (file_path , "r" ) as file :
6067 for line in file :
6168 line = line .strip ()
6269 if not line :
6370 continue
6471
65- subject_id = line .split (',' , 1 )[0 ]
72+ subject_id = line .split ("," , 1 )[0 ]
6673
6774 if subject_id not in subject_dict :
6875 subject_dict [subject_id ] = []
@@ -71,6 +78,7 @@ def create_subject_dict(file_path):
7178
7279 return subject_dict
7380
81+
7482def create_haps (path_pmug ):
7583 subject_dict = create_subject_dict (path_pmug )
7684 all_haps = {"subject_id" : [], "res_haps" : []}
@@ -79,10 +87,10 @@ def create_haps(path_pmug):
7987 res_haps = {"Haps" : [], "Probs" : [], "Pops" : []}
8088 rows = subject_dict [id ]
8189 for row in rows :
82- row = row .split (',' )
83- pair1 = str (row [1 ]).split (';' )
90+ row = row .split ("," )
91+ pair1 = str (row [1 ]).split (";" )
8492 haps1 , pops1 = pair1 [0 ], pair1 [1 ]
85- pair2 = str (row [2 ]).split (';' )
93+ pair2 = str (row [2 ]).split (";" )
8694 haps2 , pops2 = pair2 [0 ], pair2 [1 ]
8795 prob = float (row [3 ])
8896
@@ -95,6 +103,7 @@ def create_haps(path_pmug):
95103
96104 return all_haps
97105
106+
98107def is_subarray_unordered (large_array , small_array ):
99108 # Convert arrays to sets
100109 set_large = set (large_array )
@@ -103,6 +112,7 @@ def is_subarray_unordered(large_array, small_array):
103112 # Check if all elements of small_array are in large_array
104113 return set_small .issubset (set_large )
105114
115+
106116def write_best_hap_race_pairs (name_gl , haps , pops , probs , fout , numOfReasults ):
107117 all_res = []
108118
@@ -111,7 +121,7 @@ def write_best_hap_race_pairs(name_gl, haps, pops, probs, fout, numOfReasults):
111121 all_res .append ([probs [i ], pair ])
112122 all_res .sort (key = lambda x : x [0 ], reverse = True )
113123 # write the output to file
114- minBestResult = min (numOfReasults ,len (all_res ))
124+ minBestResult = min (numOfReasults , len (all_res ))
115125 for k in range (minBestResult ):
116126 fout .write (
117127 name_gl
@@ -123,7 +133,9 @@ def write_best_hap_race_pairs(name_gl, haps, pops, probs, fout, numOfReasults):
123133 + str (k )
124134 + "\n "
125135 )
126- def write_best_prob (name_gl , res , probs , fout ,number_of_pop_results ,sign = "," ):
136+
137+
138+ def write_best_prob (name_gl , res , probs , fout , number_of_pop_results , sign = "," ):
127139 sumProbsDict = defaultdict (list )
128140 # loop over the result and sum the prob by populations/haplotype
129141 for k in range (len (res )):
@@ -139,15 +151,14 @@ def write_best_prob(name_gl, res, probs, fout,number_of_pop_results ,sign=","):
139151 else :
140152 sumProbsDict [key ] = probs [k ]
141153
142-
143154 multProbs = []
144155 for k in sumProbsDict :
145156 multProbs .append ([sumProbsDict [k ], [k , sumProbsDict [k ]]])
146157
147158 multProbs .sort (key = lambda x : x [0 ], reverse = True )
148159
149160 # write the output to file
150- minBestResult = min (len (multProbs ),number_of_pop_results )
161+ minBestResult = min (len (multProbs ), number_of_pop_results )
151162 for k in range (minBestResult ):
152163 fout .write (
153164 name_gl
@@ -160,15 +171,15 @@ def write_best_prob(name_gl, res, probs, fout,number_of_pop_results ,sign=","):
160171 + "\n "
161172 )
162173
163- def write_umug (id ,res_haps ,fout ,numOfResults ):
164174
175+ def write_umug (id , res_haps , fout , numOfResults ):
165176 res_muugs = {}
166- for idx , hap in enumerate (res_haps ["Haps" ]):
167- hap1 ,hap2 = res_haps ["Haps" ][idx ][0 ], res_haps ["Haps" ][idx ][1 ]
177+ for idx , hap in enumerate (res_haps ["Haps" ]):
178+ hap1 , hap2 = res_haps ["Haps" ][idx ][0 ], res_haps ["Haps" ][idx ][1 ]
168179 prob = res_haps ["Probs" ][idx ]
169180 haps = []
170- haps .append (hap1 .split ('~' ))
171- haps .append (hap2 .split ('~' ))
181+ haps .append (hap1 .split ("~" ))
182+ haps .append (hap2 .split ("~" ))
172183 muug = ""
173184 for i in range (len (haps [0 ])):
174185 sort_hap = sorted ([haps [0 ][i ], haps [1 ][i ]])
@@ -182,27 +193,21 @@ def write_umug(id,res_haps,fout,numOfResults):
182193 for key in res_muugs .keys ():
183194 pairs .append ((key , res_muugs [key ]))
184195 pairs = sorted (pairs , key = lambda x : x [1 ], reverse = True )
185- minResults = min (numOfResults ,len (pairs ))
196+ minResults = min (numOfResults , len (pairs ))
186197 for k in range (minResults ):
187198 fout .write (
188- id
189- + ","
190- + str (pairs [k ][0 ])
191- + ","
192- + str (pairs [k ][1 ])
193- + ","
194- + str (k )
195- + "\n "
199+ id + "," + str (pairs [k ][0 ]) + "," + str (pairs [k ][1 ]) + "," + str (k ) + "\n "
196200 )
197201
198- def write_umug_pops (id ,res_haps ,fout ,numOfResults ):
202+
203+ def write_umug_pops (id , res_haps , fout , numOfResults ):
199204 res_muugs = {}
200- for idx ,pop in enumerate (res_haps ["Haps" ]):
201- pop1 ,pop2 = res_haps ["Pops" ][idx ][0 ], res_haps ["Pops" ][idx ][1 ]
205+ for idx , pop in enumerate (res_haps ["Haps" ]):
206+ pop1 , pop2 = res_haps ["Pops" ][idx ][0 ], res_haps ["Pops" ][idx ][1 ]
202207 prob = res_haps ["Probs" ][idx ]
203- pops = [pop1 ,pop2 ]
208+ pops = [pop1 , pop2 ]
204209 pops = sorted (pops )
205- muug = pops [0 ]+ ',' + pops [1 ]
210+ muug = pops [0 ] + "," + pops [1 ]
206211 if muug in res_muugs .keys ():
207212 res_muugs [muug ] += prob
208213 else :
@@ -211,48 +216,50 @@ def write_umug_pops(id,res_haps,fout,numOfResults):
211216 for key in res_muugs .keys ():
212217 pairs .append ((key , res_muugs [key ]))
213218 pairs = sorted (pairs , key = lambda x : x [1 ], reverse = True )
214- minResults = min (numOfResults ,len (pairs ))
219+ minResults = min (numOfResults , len (pairs ))
215220 for k in range (minResults ):
216221 fout .write (
217- id
218- + ","
219- + str (pairs [k ][0 ])
220- + ","
221- + str (pairs [k ][1 ])
222- + ","
223- + str (k )
224- + "\n "
222+ id + "," + str (pairs [k ][0 ]) + "," + str (pairs [k ][1 ]) + "," + str (k ) + "\n "
225223 )
226224
227- def write_filter (subject_id ,res_haps ,fout_hap_haplo ,fout_pop_haplo ,fout_hap_muug ,fout_pop_muug ,number_of_results ,number_of_pop_results ,MUUG_output ,haps_output ):
225+
226+ def write_filter (
227+ subject_id ,
228+ res_haps ,
229+ fout_hap_haplo ,
230+ fout_pop_haplo ,
231+ fout_hap_muug ,
232+ fout_pop_muug ,
233+ number_of_results ,
234+ number_of_pop_results ,
235+ MUUG_output ,
236+ haps_output ,
237+ ):
228238 haps = res_haps ["Haps" ]
229239 probs = res_haps ["Probs" ]
230240 pops = res_haps ["Pops" ]
231241 if haps_output :
232242 write_best_hap_race_pairs (
233- subject_id ,
234- haps ,
235- pops ,
236- probs ,
237- fout_hap_haplo ,
238- number_of_results
243+ subject_id , haps , pops , probs , fout_hap_haplo , number_of_results
239244 )
240- write_best_prob (subject_id , pops , probs , fout_pop_haplo ,1 )
245+ write_best_prob (subject_id , pops , probs , fout_pop_haplo , 1 )
241246 if MUUG_output :
242- write_umug (subject_id ,res_haps ,fout_hap_muug ,number_of_results )
243- write_umug_pops (subject_id ,res_haps ,fout_pop_muug ,number_of_pop_results )
247+ write_umug (subject_id , res_haps , fout_hap_muug , number_of_results )
248+ write_umug_pops (subject_id , res_haps , fout_pop_muug , number_of_pop_results )
244249
245250
246- def change_output_by_extra_gl (config ,gls ,path_pmug ,path_umug ,path_umug_pops ,path_pmug_pops ,path_miss ):
251+ def change_output_by_extra_gl (
252+ config , gls , path_pmug , path_umug , path_umug_pops , path_pmug_pops , path_miss
253+ ):
247254 res_haps = create_haps (path_pmug )
248255 all_data = {"subject_id" : [], "res_haps" : [], "extra_gl" : [], "short_gl" : []}
249256
250- if is_subarray_unordered (gls ["subject_id" ],res_haps ["subject_id" ]):
251- ids = []
257+ if is_subarray_unordered (gls ["subject_id" ], res_haps ["subject_id" ]):
258+ ids = []
252259 haps = []
253260 extras = []
254261 shorts = []
255- for idx ,id in enumerate (res_haps ["subject_id" ]):
262+ for idx , id in enumerate (res_haps ["subject_id" ]):
256263 ids .append (id )
257264 haps .append (res_haps ["res_haps" ][idx ])
258265 gl_idx = gls ["subject_id" ].index (id )
@@ -270,29 +277,40 @@ def change_output_by_extra_gl(config,gls,path_pmug,path_umug,path_umug_pops,path
270277 number_of_results = config ["number_of_results" ]
271278 number_of_pop_results = config ["number_of_pop_results" ]
272279
273- fout_hap_haplo ,fout_pop_haplo ,fout_hap_muug ,fout_pop_muug = "" ,"" ,"" ,""
280+ fout_hap_haplo , fout_pop_haplo , fout_hap_muug , fout_pop_muug = "" , "" , "" , ""
274281
275282 if haps_output :
276283 fout_hap_haplo = open (path_pmug , "w" )
277- fout_pop_haplo = open (path_pmug_pops ,"w" )
284+ fout_pop_haplo = open (path_pmug_pops , "w" )
278285 if MUUG_output :
279- fout_hap_muug = open (path_umug ,"w" )
280- fout_pop_muug = open (path_umug_pops ,"w" )
281- miss = open (path_miss ,"a" )
286+ fout_hap_muug = open (path_umug , "w" )
287+ fout_pop_muug = open (path_umug_pops , "w" )
288+ miss = open (path_miss , "a" )
282289
283- for idx ,id in enumerate (all_data ["subject_id" ]):
284- subject_id = id
290+ for idx , id in enumerate (all_data ["subject_id" ]):
291+ subject_id = id
285292 res_haps = all_data ["res_haps" ][idx ]
286293 extra_gl = all_data ["extra_gl" ][idx ]
287294
288295 if len (extra_gl ) > 0 :
289296 res_haps = filter_results (res_haps , extra_gl )
290297
291- if len (res_haps ["Haps" ]) == 0 :
298+ if len (res_haps ["Haps" ]) == 0 :
292299 gl_idx = gls ["subject_id" ].index (subject_id )
293300 miss .write (str (gl_idx ) + "," + str (subject_id ) + "\n " )
294301 else :
295- write_filter (subject_id , res_haps , fout_hap_haplo , fout_pop_haplo , fout_hap_muug , fout_pop_muug ,number_of_results ,number_of_pop_results ,MUUG_output ,haps_output )
302+ write_filter (
303+ subject_id ,
304+ res_haps ,
305+ fout_hap_haplo ,
306+ fout_pop_haplo ,
307+ fout_hap_muug ,
308+ fout_pop_muug ,
309+ number_of_results ,
310+ number_of_pop_results ,
311+ MUUG_output ,
312+ haps_output ,
313+ )
296314
297315 if MUUG_output :
298316 fout_hap_muug .close ()
0 commit comments