Skip to content

Commit aad6608

Browse files
committed
black update
1 parent e1e2113 commit aad6608

File tree

3 files changed

+134
-93
lines changed

3 files changed

+134
-93
lines changed

RunGrim.py

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,55 +8,70 @@
88
from filter_top_3 import change_donor_file
99
from filter_by_rest import change_output_by_extra_gl
1010

11+
1112
def remove_empty_rows(file_path):
1213
df = pd.read_csv(file_path)
1314

14-
df_cleaned = df.dropna(how='all')
15+
df_cleaned = df.dropna(how="all")
1516

1617
df_cleaned.to_csv(file_path, index=False)
1718

18-
def run_original_grim(path_configuration,hap_pop_pair=True,Producehpf=False,dominant3=True):
19-
with open(path_configuration,'r') as f:
20-
config = json.load(f)
2119

22-
#first step in py-graph-imputation
23-
if Producehpf :
20+
def run_original_grim(
21+
path_configuration, hap_pop_pair=True, Producehpf=False, dominant3=True
22+
):
23+
with open(path_configuration, "r") as f:
24+
config = json.load(f)
2425

26+
# first step in py-graph-imputation
27+
if Producehpf:
2528
produce_hpf(conf_file=path_configuration)
2629

2730
path_hpf = config["freq_file"]
28-
#remove empty rows from hpf otherwise doesnt work
31+
# remove empty rows from hpf otherwise doesnt work
2932
remove_empty_rows(path_hpf)
3033

31-
#second step in py-graph-imputation
34+
# second step in py-graph-imputation
3235
graph_freqs(conf_file=path_configuration)
3336

34-
35-
#changing donor file to 3 most imporatnt gls and returning short_gl,extra_gl for each row in donor
37+
# changing donor file to 3 most imporatnt gls and returning short_gl,extra_gl for each row in donor
3638
if dominant3:
3739
path_donor = config["imputation_in_file"]
3840

39-
gls , lines = change_donor_file(path_donor) #change so wont change donor file
41+
gls, lines = change_donor_file(path_donor) # change so wont change donor file
4042

41-
#imputation
42-
impute(conf_file=path_configuration,hap_pop_pair= hap_pop_pair)
43+
# imputation
44+
impute(conf_file=path_configuration, hap_pop_pair=hap_pop_pair)
4345

44-
#change the output and filter by the extra_gl
46+
# change the output and filter by the extra_gl
4547
if dominant3:
46-
path_pmug = os.path.join(config["imputation_out_path"], config["imputation_out_hap_freq_filename"])
47-
path_umug = os.path.join(config["imputation_out_path"], config["imputation_out_umug_freq_filename"])
48-
path_umug_pops = os.path.join(config["imputation_out_path"], config["imputation_out_umug_pops_filename"])
49-
path_pmug_pops = os.path.join(config["imputation_out_path"], config["imputation_out_hap_pops_filename"])
50-
path_miss = os.path.join(config["imputation_out_path"], config["imputation_out_miss_filename"])
51-
52-
change_output_by_extra_gl(config,gls,path_pmug,path_umug,path_umug_pops,path_pmug_pops,path_miss) #filter reasults in our origianl file, add miss to existing miss
48+
path_pmug = os.path.join(
49+
config["imputation_out_path"], config["imputation_out_hap_freq_filename"]
50+
)
51+
path_umug = os.path.join(
52+
config["imputation_out_path"], config["imputation_out_umug_freq_filename"]
53+
)
54+
path_umug_pops = os.path.join(
55+
config["imputation_out_path"], config["imputation_out_umug_pops_filename"]
56+
)
57+
path_pmug_pops = os.path.join(
58+
config["imputation_out_path"], config["imputation_out_hap_pops_filename"]
59+
)
60+
path_miss = os.path.join(
61+
config["imputation_out_path"], config["imputation_out_miss_filename"]
62+
)
63+
64+
change_output_by_extra_gl(
65+
config, gls, path_pmug, path_umug, path_umug_pops, path_pmug_pops, path_miss
66+
) # filter reasults in our origianl file, add miss to existing miss
5367

5468
# changing to original donor file
5569
with open(path_donor, "w") as file:
5670
for line in lines:
5771
file.write(line)
5872
file.close()
5973

74+
6075
if __name__ == "__main__":
61-
conf_file= "conf/minimal-configuration.json"
62-
run_original_grim(conf_file, True, True,True)
76+
conf_file = "conf/minimal-configuration.json"
77+
run_original_grim(conf_file, True, True, True)

filter_by_rest.py

Lines changed: 82 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,13 @@ def filter_results(res_haps, extra_gl):
2626

2727
split_extra_gl_into_locus = extra_gl.split("^")
2828

29-
dct = {locus.split("*")[0]: [set(locus.split("+")[0].split("/")), set(locus.split("+")[1].split("/"))]
30-
for locus in split_extra_gl_into_locus}
29+
dct = {
30+
locus.split("*")[0]: [
31+
set(locus.split("+")[0].split("/")),
32+
set(locus.split("+")[1].split("/")),
33+
]
34+
for locus in split_extra_gl_into_locus
35+
}
3136

3237
haps = res_haps["Haps"]
3338
filter_idx = []
@@ -37,8 +42,10 @@ def filter_results(res_haps, extra_gl):
3742
for allele1, allele2 in zip(hap1.split("~"), hap2.split("~")):
3843
loc = allele1.split("*")[0]
3944
if loc in dct:
40-
if not ((allele1 in dct[loc][0] and allele2 in dct[loc][1]) or (
41-
allele1 in dct[loc][1] and allele2 in dct[loc][0])):
45+
if not (
46+
(allele1 in dct[loc][0] and allele2 in dct[loc][1])
47+
or (allele1 in dct[loc][1] and allele2 in dct[loc][0])
48+
):
4249
check = False
4350
break
4451
if check:
@@ -56,13 +63,13 @@ def create_subject_dict(file_path):
5663
subject_dict = {}
5764

5865
# Open and read the file
59-
with open(file_path, 'r') as file:
66+
with open(file_path, "r") as file:
6067
for line in file:
6168
line = line.strip()
6269
if not line:
6370
continue
6471

65-
subject_id = line.split(',', 1)[0]
72+
subject_id = line.split(",", 1)[0]
6673

6774
if subject_id not in subject_dict:
6875
subject_dict[subject_id] = []
@@ -71,6 +78,7 @@ def create_subject_dict(file_path):
7178

7279
return subject_dict
7380

81+
7482
def create_haps(path_pmug):
7583
subject_dict = create_subject_dict(path_pmug)
7684
all_haps = {"subject_id": [], "res_haps": []}
@@ -79,10 +87,10 @@ def create_haps(path_pmug):
7987
res_haps = {"Haps": [], "Probs": [], "Pops": []}
8088
rows = subject_dict[id]
8189
for row in rows:
82-
row = row.split(',')
83-
pair1 = str(row[1]).split(';')
90+
row = row.split(",")
91+
pair1 = str(row[1]).split(";")
8492
haps1, pops1 = pair1[0], pair1[1]
85-
pair2 = str(row[2]).split(';')
93+
pair2 = str(row[2]).split(";")
8694
haps2, pops2 = pair2[0], pair2[1]
8795
prob = float(row[3])
8896

@@ -95,6 +103,7 @@ def create_haps(path_pmug):
95103

96104
return all_haps
97105

106+
98107
def is_subarray_unordered(large_array, small_array):
99108
# Convert arrays to sets
100109
set_large = set(large_array)
@@ -103,6 +112,7 @@ def is_subarray_unordered(large_array, small_array):
103112
# Check if all elements of small_array are in large_array
104113
return set_small.issubset(set_large)
105114

115+
106116
def write_best_hap_race_pairs(name_gl, haps, pops, probs, fout, numOfReasults):
107117
all_res = []
108118

@@ -111,7 +121,7 @@ def write_best_hap_race_pairs(name_gl, haps, pops, probs, fout, numOfReasults):
111121
all_res.append([probs[i], pair])
112122
all_res.sort(key=lambda x: x[0], reverse=True)
113123
# write the output to file
114-
minBestResult = min(numOfReasults,len(all_res))
124+
minBestResult = min(numOfReasults, len(all_res))
115125
for k in range(minBestResult):
116126
fout.write(
117127
name_gl
@@ -123,7 +133,9 @@ def write_best_hap_race_pairs(name_gl, haps, pops, probs, fout, numOfReasults):
123133
+ str(k)
124134
+ "\n"
125135
)
126-
def write_best_prob(name_gl, res, probs, fout,number_of_pop_results ,sign=","):
136+
137+
138+
def write_best_prob(name_gl, res, probs, fout, number_of_pop_results, sign=","):
127139
sumProbsDict = defaultdict(list)
128140
# loop over the result and sum the prob by populations/haplotype
129141
for k in range(len(res)):
@@ -139,15 +151,14 @@ def write_best_prob(name_gl, res, probs, fout,number_of_pop_results ,sign=","):
139151
else:
140152
sumProbsDict[key] = probs[k]
141153

142-
143154
multProbs = []
144155
for k in sumProbsDict:
145156
multProbs.append([sumProbsDict[k], [k, sumProbsDict[k]]])
146157

147158
multProbs.sort(key=lambda x: x[0], reverse=True)
148159

149160
# write the output to file
150-
minBestResult =min(len(multProbs),number_of_pop_results)
161+
minBestResult = min(len(multProbs), number_of_pop_results)
151162
for k in range(minBestResult):
152163
fout.write(
153164
name_gl
@@ -160,15 +171,15 @@ def write_best_prob(name_gl, res, probs, fout,number_of_pop_results ,sign=","):
160171
+ "\n"
161172
)
162173

163-
def write_umug(id,res_haps,fout,numOfResults):
164174

175+
def write_umug(id, res_haps, fout, numOfResults):
165176
res_muugs = {}
166-
for idx ,hap in enumerate(res_haps["Haps"]):
167-
hap1,hap2 = res_haps["Haps"][idx][0], res_haps["Haps"][idx][1]
177+
for idx, hap in enumerate(res_haps["Haps"]):
178+
hap1, hap2 = res_haps["Haps"][idx][0], res_haps["Haps"][idx][1]
168179
prob = res_haps["Probs"][idx]
169180
haps = []
170-
haps.append(hap1.split('~'))
171-
haps.append(hap2.split('~'))
181+
haps.append(hap1.split("~"))
182+
haps.append(hap2.split("~"))
172183
muug = ""
173184
for i in range(len(haps[0])):
174185
sort_hap = sorted([haps[0][i], haps[1][i]])
@@ -182,27 +193,21 @@ def write_umug(id,res_haps,fout,numOfResults):
182193
for key in res_muugs.keys():
183194
pairs.append((key, res_muugs[key]))
184195
pairs = sorted(pairs, key=lambda x: x[1], reverse=True)
185-
minResults = min(numOfResults,len(pairs))
196+
minResults = min(numOfResults, len(pairs))
186197
for k in range(minResults):
187198
fout.write(
188-
id
189-
+ ","
190-
+ str(pairs[k][0])
191-
+ ","
192-
+ str(pairs[k][1])
193-
+ ","
194-
+ str(k)
195-
+ "\n"
199+
id + "," + str(pairs[k][0]) + "," + str(pairs[k][1]) + "," + str(k) + "\n"
196200
)
197201

198-
def write_umug_pops(id,res_haps,fout,numOfResults):
202+
203+
def write_umug_pops(id, res_haps, fout, numOfResults):
199204
res_muugs = {}
200-
for idx,pop in enumerate(res_haps["Haps"]):
201-
pop1,pop2 = res_haps["Pops"][idx][0], res_haps["Pops"][idx][1]
205+
for idx, pop in enumerate(res_haps["Haps"]):
206+
pop1, pop2 = res_haps["Pops"][idx][0], res_haps["Pops"][idx][1]
202207
prob = res_haps["Probs"][idx]
203-
pops = [pop1,pop2]
208+
pops = [pop1, pop2]
204209
pops = sorted(pops)
205-
muug = pops[0]+','+pops[1]
210+
muug = pops[0] + "," + pops[1]
206211
if muug in res_muugs.keys():
207212
res_muugs[muug] += prob
208213
else:
@@ -211,48 +216,50 @@ def write_umug_pops(id,res_haps,fout,numOfResults):
211216
for key in res_muugs.keys():
212217
pairs.append((key, res_muugs[key]))
213218
pairs = sorted(pairs, key=lambda x: x[1], reverse=True)
214-
minResults = min(numOfResults,len(pairs))
219+
minResults = min(numOfResults, len(pairs))
215220
for k in range(minResults):
216221
fout.write(
217-
id
218-
+ ","
219-
+ str(pairs[k][0])
220-
+ ","
221-
+ str(pairs[k][1])
222-
+ ","
223-
+ str(k)
224-
+ "\n"
222+
id + "," + str(pairs[k][0]) + "," + str(pairs[k][1]) + "," + str(k) + "\n"
225223
)
226224

227-
def write_filter(subject_id,res_haps,fout_hap_haplo,fout_pop_haplo,fout_hap_muug,fout_pop_muug,number_of_results,number_of_pop_results,MUUG_output,haps_output):
225+
226+
def write_filter(
227+
subject_id,
228+
res_haps,
229+
fout_hap_haplo,
230+
fout_pop_haplo,
231+
fout_hap_muug,
232+
fout_pop_muug,
233+
number_of_results,
234+
number_of_pop_results,
235+
MUUG_output,
236+
haps_output,
237+
):
228238
haps = res_haps["Haps"]
229239
probs = res_haps["Probs"]
230240
pops = res_haps["Pops"]
231241
if haps_output:
232242
write_best_hap_race_pairs(
233-
subject_id,
234-
haps,
235-
pops,
236-
probs,
237-
fout_hap_haplo,
238-
number_of_results
243+
subject_id, haps, pops, probs, fout_hap_haplo, number_of_results
239244
)
240-
write_best_prob(subject_id, pops, probs, fout_pop_haplo,1)
245+
write_best_prob(subject_id, pops, probs, fout_pop_haplo, 1)
241246
if MUUG_output:
242-
write_umug(subject_id,res_haps,fout_hap_muug,number_of_results)
243-
write_umug_pops(subject_id,res_haps,fout_pop_muug,number_of_pop_results)
247+
write_umug(subject_id, res_haps, fout_hap_muug, number_of_results)
248+
write_umug_pops(subject_id, res_haps, fout_pop_muug, number_of_pop_results)
244249

245250

246-
def change_output_by_extra_gl(config,gls,path_pmug,path_umug,path_umug_pops,path_pmug_pops,path_miss):
251+
def change_output_by_extra_gl(
252+
config, gls, path_pmug, path_umug, path_umug_pops, path_pmug_pops, path_miss
253+
):
247254
res_haps = create_haps(path_pmug)
248255
all_data = {"subject_id": [], "res_haps": [], "extra_gl": [], "short_gl": []}
249256

250-
if is_subarray_unordered(gls["subject_id"],res_haps["subject_id"]):
251-
ids= []
257+
if is_subarray_unordered(gls["subject_id"], res_haps["subject_id"]):
258+
ids = []
252259
haps = []
253260
extras = []
254261
shorts = []
255-
for idx,id in enumerate(res_haps["subject_id"]):
262+
for idx, id in enumerate(res_haps["subject_id"]):
256263
ids.append(id)
257264
haps.append(res_haps["res_haps"][idx])
258265
gl_idx = gls["subject_id"].index(id)
@@ -270,29 +277,40 @@ def change_output_by_extra_gl(config,gls,path_pmug,path_umug,path_umug_pops,path
270277
number_of_results = config["number_of_results"]
271278
number_of_pop_results = config["number_of_pop_results"]
272279

273-
fout_hap_haplo,fout_pop_haplo,fout_hap_muug,fout_pop_muug ="","","",""
280+
fout_hap_haplo, fout_pop_haplo, fout_hap_muug, fout_pop_muug = "", "", "", ""
274281

275282
if haps_output:
276283
fout_hap_haplo = open(path_pmug, "w")
277-
fout_pop_haplo = open(path_pmug_pops,"w")
284+
fout_pop_haplo = open(path_pmug_pops, "w")
278285
if MUUG_output:
279-
fout_hap_muug = open(path_umug,"w")
280-
fout_pop_muug = open(path_umug_pops,"w")
281-
miss = open(path_miss,"a")
286+
fout_hap_muug = open(path_umug, "w")
287+
fout_pop_muug = open(path_umug_pops, "w")
288+
miss = open(path_miss, "a")
282289

283-
for idx,id in enumerate(all_data["subject_id"]):
284-
subject_id = id
290+
for idx, id in enumerate(all_data["subject_id"]):
291+
subject_id = id
285292
res_haps = all_data["res_haps"][idx]
286293
extra_gl = all_data["extra_gl"][idx]
287294

288295
if len(extra_gl) > 0:
289296
res_haps = filter_results(res_haps, extra_gl)
290297

291-
if len(res_haps["Haps"]) == 0 :
298+
if len(res_haps["Haps"]) == 0:
292299
gl_idx = gls["subject_id"].index(subject_id)
293300
miss.write(str(gl_idx) + "," + str(subject_id) + "\n")
294301
else:
295-
write_filter(subject_id, res_haps, fout_hap_haplo, fout_pop_haplo, fout_hap_muug, fout_pop_muug,number_of_results,number_of_pop_results,MUUG_output,haps_output)
302+
write_filter(
303+
subject_id,
304+
res_haps,
305+
fout_hap_haplo,
306+
fout_pop_haplo,
307+
fout_hap_muug,
308+
fout_pop_muug,
309+
number_of_results,
310+
number_of_pop_results,
311+
MUUG_output,
312+
haps_output,
313+
)
296314

297315
if MUUG_output:
298316
fout_hap_muug.close()

0 commit comments

Comments
 (0)