Skip to content

Commit 5f3378d

Browse files
authored
Merge pull request #2430 from NNPDF/update_hyperoptplot
Hyperoptplot for new hyperopt
2 parents 2371942 + 3cb2372 commit 5f3378d

File tree

3 files changed

+117
-32
lines changed

3 files changed

+117
-32
lines changed

validphys2/src/validphys/hyperoptplot.py

Lines changed: 94 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import numpy as np
1919
import pandas as pd
2020
import seaborn as sns
21+
import matplotlib.pyplot as plt
2122

2223
from reportengine.figure import figure
2324
from reportengine.table import table
@@ -301,24 +302,18 @@ def parse_statistics(trial):
301302
testing loss
302303
status of the run
303304
"""
305+
hypr_keys = ["hyper_losses_chi2", "hyper_losses_phi2", "hyper_losses_logp", "trvl_losses_chi2exp"]
304306
dict_out = {}
305307
results = trial["result"]
306308
validation_loss = results[KEYWORDS["vl"]]
307309
testing_loss = results[KEYWORDS["tl"]]
308-
# was this a ok run?
309-
ok = bool(results["status"] == "ok")
310310

311-
dict_out[KEYWORDS["good"]] = ok
311+
dict_out[KEYWORDS["good"]] = bool(results["status"] == "ok")
312312
dict_out[KEYWORDS["vl"]] = validation_loss
313313
dict_out[KEYWORDS["tl"]] = testing_loss
314314

315-
# Kfolding information
316-
# average = results["kfold_meta"]["hyper_avg"]
317-
# std = results["kfold_meta"]["hyper_std"]
318-
# dict_out["avg"] = average
319-
# dict_out["std"] = std
320-
dict_out["hlosses"] = results["kfold_meta"]["hyper_losses"]
321-
dict_out["vlosses"] = results["kfold_meta"]["validation_losses"]
315+
for key in hypr_keys: # K-folding information
316+
dict_out[key] = results["kfold_meta"][key]
322317
return dict_out
323318

324319

@@ -344,27 +339,34 @@ def evaluate_trial(trial_dict, validation_multiplier, fail_threshold, loss_targe
344339
"""
345340
Read a trial dictionary and compute the true loss and decide whether the run passes or not
346341
"""
347-
test_f = 1.0 - validation_multiplier
348-
val_loss = float(trial_dict[KEYWORDS["vl"]])
342+
hypr_metric_keys = ["chi2", "phi2", "logp"]
349343
if loss_target == "average":
350-
test_loss = np.array(trial_dict["hlosses"]).mean()
344+
for hypr_key in hypr_metric_keys:
345+
trial_dict[f"hyper_loss_{hypr_key}"] = np.array(
346+
trial_dict[f"hyper_losses_{hypr_key}"]
347+
).mean()
351348
elif loss_target == "best_worst":
352-
test_loss = np.array(trial_dict["hlosses"]).max()
349+
for hypr_key in hypr_metric_keys:
350+
trial_dict[f"hyper_loss_{hypr_key}"] = np.array(
351+
trial_dict[f"hyper_losses_{hypr_key}"]
352+
).max()
353353
elif loss_target == "std":
354-
test_loss = np.array(trial_dict["hlosses"]).std()
355-
loss = val_loss * validation_multiplier + test_loss * test_f
354+
for hypr_key in hypr_metric_keys:
355+
trial_dict[f"hyper_loss_{hypr_key}"] = np.array(
356+
trial_dict[f"hyper_losses_{hypr_key}"]
357+
).std()
358+
else:
359+
raise ValueError(f"Loss target {loss_target} is not valid.")
356360

357-
if (
358-
loss > fail_threshold
359-
or val_loss > fail_threshold
360-
or test_loss > fail_threshold
361-
or np.isnan(loss)
362-
):
363-
trial_dict["good"] = False
364-
# Set the loss an order of magnitude above the result so it shows obviously on the plots
365-
loss *= 10
361+
for hypr_key in hypr_metric_keys:
362+
if np.isnan(trial_dict[f"hyper_loss_{hypr_key}"]):
363+
trial_dict[f"hyper_loss_{hypr_key}"] *= 100
364+
365+
trial_dict["trvl_loss_chi2exp"] = np.array(
366+
trial_dict["trvl_losses_chi2exp"]
367+
).mean()
366368

367-
trial_dict["loss"] = loss
369+
return
368370

369371

370372
def generate_dictionary(
@@ -543,10 +545,6 @@ def hyperopt_dataframe(commandline_args):
543545
# Make into a dataframe and transpose or the plotting code will complain
544546
best_trial = best_trial_series.to_frame().T
545547

546-
log.info("Best setup:")
547-
with pd.option_context("display.max_rows", None, "display.max_columns", None):
548-
log.info(best_trial)
549-
550548
return dataframe, best_trial
551549

552550

@@ -585,9 +583,11 @@ def hyperopt_table(hyperopt_dataframe):
585583
Generates a table containing complete information on all the tested setups that passed the
586584
filters set in the commandline arguments.
587585
"""
586+
drop_keys = ["hyper_losses_chi2", "hyper_losses_phi2", "hyper_losses_logp"]
588587
dataframe, _ = hyperopt_dataframe
589-
dataframe = dataframe.sort_values(by=["loss"])
590-
return dataframe
588+
n_layers = dataframe['number_of_layers'].max()
589+
drop_keys += [f"layer_{idx}" for idx in range(1, n_layers)]
590+
return dataframe.drop(columns=drop_keys, inplace=False).sort_values(by=["hyper_loss_chi2"], inplace=False)
591591

592592

593593
@figure
@@ -683,6 +683,68 @@ def plot_activation_per_layer(hyperopt_dataframe):
683683
fig = plot_scans(dataframe, best_trial, "activation_per_layer")
684684
return fig
685685

686+
@figure
687+
def plot_cumulative_logp_chi2(hyperopt_dataframe, commandline_args):
688+
"""
689+
Generate a plot of the running average of the log-likelihood (chi2)
690+
on the left (right) axis as a function of the trial index
691+
"""
692+
693+
args = SimpleNamespace(**commandline_args)
694+
chi2max = args.chi2_threshold
695+
results, _ = hyperopt_dataframe
696+
mlogp_ = results['hyper_loss_logp'].to_numpy()
697+
chi2_ = results['hyper_loss_chi2'].to_numpy()
698+
699+
# don t look at samples with -logp or chi2 too big
700+
idx_ok = np.where(chi2_<chi2max)
701+
fig, ax1 = plt.subplots()
702+
color = 'tab:blue'
703+
mlogp = mlogp_[idx_ok]
704+
xlabels = np.arange(len(mlogp))
705+
cum_average = np.cumsum(mlogp)/np.arange(1,len(mlogp)+1)
706+
ax1.scatter(xlabels, cum_average, color=color, s=50, label="cum avg")
707+
ax1.set_ylabel(r"$- \text{E}\left[\log p(\theta)\right]_{\text{trials}}$", color=color)
708+
ax1.set_xlabel('number of trials')
709+
ax1.tick_params(axis='y', labelcolor=color)
710+
711+
ax2 = ax1.twinx()
712+
color = 'tab:red'
713+
chi2 = chi2_[idx_ok]
714+
xlabels = np.arange(len(chi2))
715+
cum_average_chi2 = np.cumsum(chi2)/np.arange(1,len(chi2)+1)
716+
ax2.scatter(xlabels, cum_average_chi2, marker='*', color=color, s=50, label=r"$\chi^2(\theta)$", alpha=0.3)
717+
ax2.set_ylabel(r"$\text{E}\left[\chi^2(\theta)\right]_{\text{trials}}$", color=color)
718+
ax2.tick_params(axis='y', labelcolor=color)
719+
return fig
720+
721+
@figure
722+
def plot_cumulative_loss(hyperopt_dataframe, commandline_args):
723+
"""
724+
Generate a plot of the running average of the log-likelihood
725+
as a function of the trial index
726+
"""
727+
728+
args = SimpleNamespace(**commandline_args)
729+
chi2exp_max = args.chi2exp_threshold
730+
results, _ = hyperopt_dataframe
731+
732+
mloss_ = results['loss'].to_numpy()
733+
chi2_ = results['hyper_loss_chi2'].to_numpy()
734+
chi2exp = results['trvl_loss_chi2exp'].to_numpy()
735+
736+
idx_ok = np.where(chi2exp<chi2exp_max)
737+
fig, ax = plt.subplots()
738+
mloss = mloss_[idx_ok]
739+
xlabels = np.arange(len(mloss))
740+
cum_average = np.cumsum(mloss)/np.arange(1,len(mloss)+1)
741+
ax.scatter(xlabels, cum_average, s=50, label="cum avg")
742+
ax.set_ylabel(r"loss")
743+
ax.set_xlabel('number of trials')
744+
ax.tick_params(axis='y')
745+
746+
return fig
747+
686748

687749
def order_axis(df, bestdf, key):
688750
"""

validphys2/src/validphys/hyperplottemplates/report.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,12 @@
1818
## Activation function
1919
{@ plot_activation_per_layer @}
2020

21+
## Loss (including penalty term)
22+
{@ plot_cumulative_loss @}
23+
24+
## -logp and chi2
25+
{@ plot_cumulative_logp_chi2 @}
26+
27+
2128
## Results Table
2229
[Detailed hyperopt results]({@ results_table report @})

validphys2/src/validphys/scripts/vp_hyperoptplot.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@ def add_positional_arguments(self, parser):
4444
type=float,
4545
default=1e3,
4646
)
47+
parser.add_argument(
48+
"-ct",
49+
"--chi2_threshold",
50+
help="Value of the chi2 threshold for logp plots",
51+
type=float,
52+
default=5.,
53+
)
54+
parser.add_argument(
55+
"-cet",
56+
"--chi2exp_threshold",
57+
help="Value of the exp chi2 threshold for loss plots",
58+
type=float,
59+
default=1.35,
60+
)
4761
parser.add_argument(
4862
"-f",
4963
"--filter",
@@ -123,6 +137,8 @@ def complete_mapping(self):
123137
"autofilter": args["autofilter"],
124138
"debug": args["debug"],
125139
"loss_target": args["loss_target"],
140+
"chi2exp_threshold": args["chi2exp_threshold"],
141+
"chi2_threshold": args["chi2_threshold"],
126142
}
127143

128144
try:

0 commit comments

Comments
 (0)