Skip to content

Commit 3214041

Browse files
committed
Normalization is working.
1 parent 9dbf1d2 commit 3214041

File tree

1 file changed

+84
-76
lines changed

1 file changed

+84
-76
lines changed

structure_threader/wrappers/maverick_wrapper.py

Lines changed: 84 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import structure_threader.colorer.colorer as colorer
3030

3131

32-
def mav_cli_generator(arg, k_val):
32+
def mav_cli_generator(arg, k_val, mav_params):
3333
"""
3434
Generates and returns the command line to run MavericK.
3535
"""
@@ -48,61 +48,51 @@ def mav_cli_generator(arg, k_val):
4848
root_dir, "-parameters", arg.params]
4949
if arg.notests is True:
5050
cli += ["-thermodynamic_on", "f"]
51-
failsafe = mav_alpha_failsafe(arg.params, arg.k_list)
51+
failsafe = mav_alpha_failsafe(mav_params, arg.k_list)
5252
for param in failsafe:
5353
if failsafe[param] is not False:
5454
cli += ["-" + param, failsafe[param][k_val]]
5555

5656
return cli, output_dir
5757

5858

59-
def mav_ti_in_use(parameter_filename):
59+
def mav_ti_in_use(parameters):
6060
"""
6161
Checks if TI is in use. Returns True or Flase.
6262
"""
63-
parsed_data = mav_params_parser(parameter_filename, ("thermodynamic_on",))
63+
ti_param = "thermodynamic_on"
6464

6565
use_ti = True
66-
if parsed_data["thermodynamic_on"].lower() in ("f", "false", "0"):
67-
use_ti = False
68-
logging.error("Thermodynamic integration is turned OFF. "
69-
"Using STRUCTURE criteria for bestK estimation.")
70-
elif not parsed_data:
66+
try:
67+
if parameters[ti_param].lower() in ("f", "false", "0"):
68+
use_ti = False
69+
logging.error("Thermodynamic integration is turned OFF. "
70+
"Using STRUCTURE criteria for bestK estimation.")
71+
except KeyError:
7172
logging.error("The parameter setting Thermodynamic integration was not "
7273
"found. Assuming the default 'on' value.")
7374

7475
return use_ti
7576

7677

77-
def mav_params_parser(parameter_filename, query):
78+
def mav_params_parser(parameter_filename):
7879
"""
7980
Parses MavericK's parameter file and returns the results in a dict.
80-
Returns "None" if no matches are found.
8181
"""
82-
# Add a "\t" at the end of each string to avoid finding partial strings
83-
# such as "alpha" and "alphaPropSD".
84-
sane_query = tuple((x + '\t' for x in query))
85-
print(sane_query)
86-
8782
param_file = open(parameter_filename, "r")
88-
result = {}
83+
parameters = {}
8984

9085
for lines in param_file:
91-
if lines.startswith(sane_query):
86+
if not lines.startswith(("#", "\n")):
9287
lines = lines.split()
93-
result[lines[0]] = lines[1]
88+
parameters[lines[0]] = lines[1]
9489

9590
param_file.close()
9691

97-
if result == {}:
98-
logging.error("Failed to find the parameter(s) '%s'. Please verify the "
99-
"parameter file, or the run options.", query)
100-
result = None
101-
else:
102-
return result
92+
return parameters
10393

10494

105-
def mav_alpha_failsafe(parameter_filename, k_list):
95+
def mav_alpha_failsafe(mav_params, k_list):
10696
"""
10797
Implements a failsafe for discrepancies with multiple alpha values.
10898
Returns the following dict:
@@ -112,12 +102,12 @@ def mav_alpha_failsafe(parameter_filename, k_list):
112102
"""
113103
parameters = ("alpha", "alphaPropSD")
114104

105+
parsed_data = {x: mav_params[x] if x in mav_params else False for x in
106+
parameters}
115107
sorted_data = {x: False for x in parameters}
116108

117-
parsed_data = mav_params_parser(parameter_filename, parameters)
118-
119-
if parsed_data is not None:
120-
for param, val in parsed_data.items():
109+
for param, val in parsed_data.items():
110+
if val:
121111
val = val.split(",")
122112
if len(val) > 1:
123113
if len(val) != len(k_list):
@@ -133,7 +123,7 @@ def mav_alpha_failsafe(parameter_filename, k_list):
133123
return sorted_data
134124

135125

136-
def maverick_merger(outdir, k_list, params_file, no_tests):
126+
def maverick_merger(outdir, k_list, mav_params, no_tests):
137127
"""
138128
Grabs the split outputs from MavericK and merges them in a single directory.
139129
Also uses the data from these file to generate an
@@ -155,19 +145,19 @@ def _mav_output_parser(filename):
155145

156146
return data
157147

158-
def _ti_test(outdir, log_evidence_mv):
159-
"""
160-
Write a bestK result based in TI results.
161-
"""
162-
bestk_dir = os.path.join(outdir, "bestK")
163-
os.makedirs(bestk_dir, exist_ok=True)
164-
bestk = max(log_evidence_mv, key=log_evidence_mv.get).replace("K", "1")
165-
bestk_file = open(os.path.join(bestk_dir, "TI_integration.txt"), "w")
166-
output_text = ("MavericK's estimation test revealed "
167-
"that the best value of 'K' is: {}\n".format(bestk))
168-
bestk_file.write(output_text)
169-
bestk_file.close()
170-
return [int(bestk)]
148+
# def _ti_test(outdir, log_evidence_mv):
149+
# """
150+
# Write a bestK result based in TI results.
151+
# """
152+
# bestk_dir = os.path.join(outdir, "bestK")
153+
# os.makedirs(bestk_dir, exist_ok=True)
154+
# bestk = max(log_evidence_mv, key=log_evidence_mv.get).replace("K", "1")
155+
# bestk_file = open(os.path.join(bestk_dir, "TI_integration.txt"), "w")
156+
# output_text = ("MavericK's estimation test revealed "
157+
# "that the best value of 'K' is: {}\n".format(bestk))
158+
# bestk_file.write(output_text)
159+
# bestk_file.close()
160+
# return [int(bestk)]
171161

172162
def _gen_files_list(output_params, no_tests):
173163
"""
@@ -176,34 +166,30 @@ def _gen_files_list(output_params, no_tests):
176166
"""
177167
files_list = []
178168

179-
parsed_params = mav_params_parser(params_file, output_params)
169+
parsed_params = {x: mav_params[x] if x in mav_params else False for x in
170+
output_params}
180171

181172
# Generate a list with the files to parse and merge
182-
try:
183-
if parsed_params["outputEvidence_on"].lower() in ("f",
184-
"false", "0"):
185-
no_tests = True
186-
logging.error("'outputEvidence' is set to false. Tests will be "
187-
"skipped.")
188-
except KeyError:
189-
pass
190173

191-
try:
174+
if parsed_params["outputEvidence_on"].lower() in ("f", "false", "0"):
175+
no_tests = True
176+
logging.error("'outputEvidence' is set to false. Tests will be "
177+
"skipped.")
178+
if parsed_params["outputEvidence"]:
192179
files_list.append(parsed_params["outputEvidence"])
193-
except KeyError:
180+
else:
194181
files_list.append("outputEvidence.csv")
195182

196-
try:
183+
if parsed_params["outputEvidenceDetails"]:
197184
evidence_filename = parsed_params["outputEvidenceDetails"]
198-
except KeyError:
185+
else:
199186
evidence_filename = "outputEvidenceDetails.csv"
200187

201-
try:
202-
if parsed_params["outputEvidenceDetails_on"].lower() in ("t",
203-
"true",
204-
"1"):
205-
files_list.append(evidence_filename)
206-
except KeyError:
188+
if parsed_params["outputEvidenceDetails_on"].lower() in ("f",
189+
"false",
190+
"0"):
191+
pass
192+
else:
207193
files_list.append(evidence_filename)
208194

209195
return files_list, no_tests
@@ -212,29 +198,50 @@ def _write_normalized_output(evidence, k_list):
212198
"""
213199
Writes the normalized output file.
214200
"""
215-
param_entry = mav_params_parser(params_file, "outputEvidenceNormalised")
201+
from itertools import chain
202+
param_entry = "outputEvidenceNormalised"
216203

217-
if param_entry is not None:
218-
filename = param_entry["outputEvidenceNormalised"]
204+
if param_entry in mav_params:
205+
filename = mav_params["outputEvidenceNormalised"]
219206
else:
220207
filename = "outputEvidenceNormalised.csv"
221208
filepath = os.path.join(mrg_res_dir, filename)
222209

223210
categories = ("harmonic_grand", "structure_grand", "TI")
224211

225-
indep = [["logEvidence_" + x + "Mean",
226-
"logEvidence_" + x + "SE"] for x in categories]
212+
indep = [["logEvidence_" + x + "Mean" if x != "TI" else "logEvidence_"
213+
+ x,
214+
"logEvidence_" + x + "SE" if x != "TI" else "logEvidence_"
215+
+ x + "_SE"] for x in categories]
227216

228217
p_format = "posterior_{}{}"
229218

230219
posterior = [[[p_format.format(x.replace("_grand", ""), i)]
231220
for i in ["_mean", "_LL", "_UL"]]
232221
for x in categories]
222+
flat_posterior = list(chain.from_iterable(
223+
list(chain.from_iterable(posterior))))
233224

234-
normalized = {}
225+
normalized = []
235226
for cat in indep:
236-
normalized[cat] = maverick_normalization(evidence[cat][0],
237-
evidence[cat][1], k_list)
227+
for i in cat:
228+
evidence[i] = [float(x) for x in evidence[i]]
229+
normalized.append(maverick_normalization(evidence[cat[0]],
230+
evidence[cat[1]], k_list))
231+
232+
dtypes = ("norm_mean", "lower_limit", "upper_limit")
233+
234+
outfile = open(filepath, 'w')
235+
236+
outfile.write(",".join(["K", "posterior_exhaustive"] + flat_posterior))
237+
outfile.write("\n")
238+
for k in k_list:
239+
line = str(k) + ",N/A"
240+
for i in normalized:
241+
line += "," + ",".join([str(i[k][x]) for x in dtypes])
242+
243+
outfile.write(line)
244+
outfile.write("\n")
238245

239246

240247

@@ -269,21 +276,22 @@ def _write_normalized_output(evidence, k_list):
269276
first_k = False
270277
else:
271278
outfile.write(diff[1])
272-
279+
if evidence is not None:
280+
_write_normalized_output(evidence, k_list)
273281
outfile.close()
274282

275283

276-
if no_tests is False:
277-
bestk = _ti_test(outdir, log_evidence_mv)
278-
return bestk
284+
285+
# if no_tests is False:
286+
# bestk = _ti_test(outdir, log_evidence_mv)
287+
# return bestk
279288

280289

281290
def maverick_normalization(x_mean, x_sd, klist, draws=int(1e6), limit=95):
282291
"""
283292
Performs TI normalization as in the original implementation from MavericK.
284293
This is essentially a port from the C++ code written by Bob Verity.
285294
"""
286-
287295
# subtract maximum value from x_mean (this has no effect on final outcome
288296
# but prevents under/overflow)
289297
# Just like in the original implementation (even though it should not be

0 commit comments

Comments
 (0)