Skip to content

Commit 9dbf1d2

Browse files
committed
Started to implement normalized results writer.
1 parent b4178d0 commit 9dbf1d2

File tree

1 file changed

+102
-20
lines changed

1 file changed

+102
-20
lines changed

structure_threader/wrappers/maverick_wrapper.py

Lines changed: 102 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def mav_ti_in_use(parameter_filename):
6363
parsed_data = mav_params_parser(parameter_filename, ("thermodynamic_on",))
6464

6565
use_ti = True
66-
if parsed_data["thermodynamic_on"] in ("f", "false", "0"):
66+
if parsed_data["thermodynamic_on"].lower() in ("f", "false", "0"):
6767
use_ti = False
6868
logging.error("Thermodynamic integration is turned OFF. "
6969
"Using STRUCTURE criteria for bestK estimation.")
@@ -133,18 +133,14 @@ def mav_alpha_failsafe(parameter_filename, k_list):
133133
return sorted_data
134134

135135

136-
def maverick_merger(outdir, k_list, params, no_tests):
136+
def maverick_merger(outdir, k_list, params_file, no_tests):
137137
"""
138138
Grabs the split outputs from MavericK and merges them in a single directory.
139139
Also uses the data from these file to generate an
140140
"outputEvidenceNormalized.csv" file.
141141
"""
142-
files_list = ["outputEvidence.csv", "outputEvidenceDetails.csv"]
143-
mrg_res_dir = os.path.join(outdir, "merged")
144-
os.makedirs(mrg_res_dir, exist_ok=True)
145-
log_evidence_mv = {}
146142

147-
def _mav_output_parser(filename, get_header):
143+
def _mav_output_parser(filename):
148144
"""
149145
Parse MavericK output files that need to be merged for TI calculations.
150146
Returns the contents of the parsed files as a single string, with or
@@ -154,8 +150,8 @@ def _mav_output_parser(filename, get_header):
154150
header = infile.readline()
155151
data = "".join(infile.readlines())
156152
infile.close()
157-
if get_header is True:
158-
data = header + data
153+
154+
data = header + data
159155

160156
return data
161157

@@ -173,24 +169,110 @@ def _ti_test(outdir, log_evidence_mv):
173169
bestk_file.close()
174170
return [int(bestk)]
175171

176-
for filename in files_list:
177-
header = True
178-
if mav_ti_in_use(params) is True:
179-
column_num = -2
172+
def _gen_files_list(output_params, no_tests):
173+
"""
174+
Defines the output filenames to read based on data from the parameter
175+
file. Returns a list.
176+
"""
177+
files_list = []
178+
179+
parsed_params = mav_params_parser(params_file, output_params)
180+
181+
# Generate a list with the files to parse and merge
182+
try:
183+
if parsed_params["outputEvidence_on"].lower() in ("f",
184+
"false", "0"):
185+
no_tests = True
186+
logging.error("'outputEvidence' is set to false. Tests will be "
187+
"skipped.")
188+
except KeyError:
189+
pass
190+
191+
try:
192+
files_list.append(parsed_params["outputEvidence"])
193+
except KeyError:
194+
files_list.append("outputEvidence.csv")
195+
196+
try:
197+
evidence_filename = parsed_params["outputEvidenceDetails"]
198+
except KeyError:
199+
evidence_filename = "outputEvidenceDetails.csv"
200+
201+
try:
202+
if parsed_params["outputEvidenceDetails_on"].lower() in ("t",
203+
"true",
204+
"1"):
205+
files_list.append(evidence_filename)
206+
except KeyError:
207+
files_list.append(evidence_filename)
208+
209+
return files_list, no_tests
210+
211+
def _write_normalized_output(evidence, k_list):
212+
"""
213+
Writes the normalized output file.
214+
"""
215+
param_entry = mav_params_parser(params_file, "outputEvidenceNormalised")
216+
217+
if param_entry is not None:
218+
filename = param_entry["outputEvidenceNormalised"]
180219
else:
181-
column_num = -4
220+
filename = "outputEvidenceNormalised.csv"
221+
filepath = os.path.join(mrg_res_dir, filename)
222+
223+
categories = ("harmonic_grand", "structure_grand", "TI")
224+
225+
indep = [["logEvidence_" + x + "Mean",
226+
"logEvidence_" + x + "SE"] for x in categories]
227+
228+
p_format = "posterior_{}{}"
229+
230+
posterior = [[[p_format.format(x.replace("_grand", ""), i)]
231+
for i in ["_mean", "_LL", "_UL"]]
232+
for x in categories]
233+
234+
normalized = {}
235+
for cat in indep:
236+
normalized[cat] = maverick_normalization(evidence[cat][0],
237+
evidence[cat][1], k_list)
238+
239+
240+
241+
output_params = ("outputEvidence", "outputEvidence_on",
242+
"outputEvidenceDetails_on", "outputEvidenceDetails")
243+
244+
files_list, no_tests = _gen_files_list(output_params, no_tests)
245+
246+
# Handle a new dirctory for merged data
247+
mrg_res_dir = os.path.join(outdir, "merged")
248+
os.makedirs(mrg_res_dir, exist_ok=True)
249+
250+
for filename in files_list:
182251
outfile = open(os.path.join(mrg_res_dir, filename), "w")
252+
first_k = True
253+
if filename == files_list[0]:
254+
evidence = {}
255+
else:
256+
evidence = None
183257
for i in k_list:
184258
data_dir = os.path.join(outdir, "mav_K" + str(i))
185-
data = _mav_output_parser(os.path.join(data_dir, filename), header)
186-
header = False
187-
if filename == "outputEvidence.csv":
188-
log_evidence_mv[data.split(",")[0]] = float(
189-
data.split(",")[column_num])
190-
outfile.write(data)
259+
data = _mav_output_parser(os.path.join(data_dir, filename))
260+
diff = data.split("\n")
261+
if evidence == {}:
262+
evidence = {head: [val] for head, val in
263+
zip(diff[0].split(","), diff[1].split(","))}
264+
elif evidence is not None:
265+
for j, k in zip(diff[0].split(","), diff[1].split(",")):
266+
evidence[j].append(k)
267+
if first_k:
268+
outfile.write(data)
269+
first_k = False
270+
else:
271+
outfile.write(diff[1])
191272

192273
outfile.close()
193274

275+
194276
if no_tests is False:
195277
bestk = _ti_test(outdir, log_evidence_mv)
196278
return bestk

0 commit comments

Comments
 (0)