Skip to content

Commit 5d69b5d

Browse files
authored
Merge pull request #2391 from NNPDF/hyperopt_penalty
Penalty term in hyperotp figure of merit
2 parents f67898d + 1ddaafc commit 5d69b5d

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

n3fit/src/n3fit/layers/losses.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,40 @@ def apply_loss(self, y_pred):
183183
y = y_pred * y_pred
184184
# Sum over the batch and the datapoints
185185
return op.sum(y, axis=[0, -1])
186+
187+
class LossHyperopt:
188+
"""
189+
Returns L = \\lambda*elu(chi2-chi2ref)
190+
191+
The hyperotp loss is computed by taking the difference
192+
between the input experimental chi2 and a chi2 reference value chi2ref,
193+
and then applying the elu function, defined by
194+
f(x) = x if x > 0
195+
f(x) = alpha * (e^{x} - 1) if x < 0
196+
This is done to avoid a big discontinuity in the derivative at 0 when
197+
the lagrange multiplier is very big.
198+
In practice this function can produce results in the range (-alpha, inf)
199+
200+
Example
201+
-------
202+
>>> import numpy as np
203+
>>> from n3fit.layers import losses
204+
>>> chi2 = np.asarray(2)
205+
>>> alpha = 1e-7
206+
>>> c = 1e2
207+
>>> chi2ref = np.asarray(1.25)
208+
>>> loss_h = losses.LossHyperopt(c=c, alpha=alpha, chi2ref=chi2ref)
209+
>>> loss_h(chi2) == np.asarray(c * (chi2-chi2ref))
210+
True
211+
"""
212+
213+
def __init__(self, c=1e2, alpha=1e-10, chi2ref=1.2):
214+
self.c = c
215+
self.alpha = alpha
216+
self.chi2ref = chi2ref
217+
218+
def __call__(self, chi2):
219+
loss = op.elu(chi2-self.chi2ref, alpha=self.alpha)
220+
return self.c * loss.numpy()
221+
222+

n3fit/src/n3fit/model_trainer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from n3fit import model_gen
1919
from n3fit.backends import NN_LAYER_ALL_REPLICAS, MetaModel, callbacks, clear_backend_state
2020
from n3fit.backends import operations as op
21+
from n3fit.layers import losses
2122
from n3fit.hyper_optimization.hyper_scan import HYPEROPT_STATUSES
2223
import n3fit.hyper_optimization.penalties
2324
from n3fit.hyper_optimization.rewards import HyperLoss
@@ -891,6 +892,7 @@ def hyperparametrizable(self, params):
891892
trvl_chi2_per_fold = []
892893
trvl_phi2_per_fold = []
893894
trvl_logp_per_fold = []
895+
trvl_chi2exp_per_fold = []
894896

895897
# Generate the grid in x, note this is the same for all partitions
896898
xinput = self._xgrid_generation()
@@ -1044,6 +1046,7 @@ def hyperparametrizable(self, params):
10441046
l_valid.append(validation_loss)
10451047
l_exper.append(experimental_loss)
10461048
trvl_chi2_per_fold.append(hyper_metrics.chi2)
1049+
trvl_chi2exp_per_fold.append(hyper_metrics.chi2exp)
10471050
trvl_phi2_per_fold.append(hyper_metrics.phi2)
10481051
trvl_logp_per_fold.append(hyper_metrics.logp)
10491052
pdfs_per_fold.append(pdf_model)
@@ -1074,6 +1077,11 @@ def hyperparametrizable(self, params):
10741077

10751078
# Compute the loss over all folds for hyperopt
10761079
final_hyper_loss = self._hyper_loss.reduce_over_folds(l_hyper)
1080+
1081+
# Add penalty term to ensure convergence
1082+
exp_chi2_fitted_data = np.average(trvl_chi2exp_per_fold)
1083+
expchi2_penalty = losses.LossHyperopt()
1084+
final_hyper_loss += expchi2_penalty(exp_chi2_fitted_data)
10771085

10781086
# Hyperopt needs a dictionary with information about the losses
10791087
# it is possible to store arbitrary information in the trial file
@@ -1086,6 +1094,7 @@ def hyperparametrizable(self, params):
10861094
"kfold_meta": {
10871095
"validation_losses": l_valid,
10881096
"trvl_losses_chi2": np.array(trvl_chi2_per_fold),
1097+
"trvl_losses_chi2exp": np.array(trvl_chi2exp_per_fold),
10891098
"trvl_losses_phi2": np.array(trvl_phi2_per_fold),
10901099
"trvl_losses_logp": np.array(trvl_logp_per_fold),
10911100
"experimental_losses": l_exper,

n3fit/src/n3fit/vpinterface.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class HyperoptMetrics:
6262
chi2: float
6363
phi2: float
6464
logp: float
65+
chi2exp: float
6566

6667

6768
class N3Stats(MCStats):
@@ -443,11 +444,15 @@ def compute_hyperopt_metrics(n3pdf, experimental_data) -> HyperoptMetrics:
443444
# Compute the chi2
444445
total_covmat_chol = la.cholesky(total_covmat, lower=True)
445446
chi2 = calc_chi2(sqrtcov=total_covmat_chol, diffs=diffs)
447+
448+
# Compute the experimental chi2
449+
exp_covmat_chol = la.cholesky(exp_cov, lower=True)
450+
chi2exp = calc_chi2(sqrtcov=exp_covmat_chol, diffs=diffs)
446451

447452
# Compute phi2
448453
phi2 = calc_phi(sqrtcov=exp_covmat_col, diffs=diffs_reps)
449454

450455
ndat = len(diffs)
451456
logp = -0.5 * (len(diffs) * np.log(2 * np.pi) + log_det_total_cov + chi2)
452457

453-
return HyperoptMetrics(chi2=chi2 / ndat, phi2=phi2, logp=-logp / ndat)
458+
return HyperoptMetrics(chi2=chi2 / ndat, phi2=phi2, logp=-logp / ndat, chi2exp=chi2exp / ndat)

0 commit comments

Comments
 (0)