Merge pull request #2391 from NNPDF/hyperopt_penalty

scarlehoff · web-flow · commit 5d69b5dd6dd5 · 2025-12-18T11:50:08.000+01:00
Penalty term in hyperotp figure of merit
diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py
@@ -183,3 +183,40 @@ def apply_loss(self, y_pred):
         y = y_pred * y_pred
         # Sum over the batch and the datapoints
         return op.sum(y, axis=[0, -1])
+
+class LossHyperopt:
+    """
+    Returns L = \\lambda*elu(chi2-chi2ref)
+
+    The hyperotp loss is computed by taking the difference 
+    between the input experimental chi2 and a chi2 reference value chi2ref,
+    and then applying the elu function, defined by
+        f(x) = x if x > 0
+        f(x) = alpha * (e^{x} - 1) if x < 0
+    This is done to avoid a big discontinuity in the derivative at 0 when
+    the lagrange multiplier is very big.
+    In practice this function can produce results in the range (-alpha, inf)
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> from n3fit.layers import losses
+    >>> chi2 = np.asarray(2)
+    >>> alpha = 1e-7
+    >>> c = 1e2
+    >>> chi2ref = np.asarray(1.25)
+    >>> loss_h = losses.LossHyperopt(c=c, alpha=alpha, chi2ref=chi2ref)
+    >>> loss_h(chi2) == np.asarray(c * (chi2-chi2ref))
+    True
+    """
+
+    def __init__(self, c=1e2, alpha=1e-10, chi2ref=1.2):
+        self.c = c
+        self.alpha = alpha
+        self.chi2ref = chi2ref
+
+    def __call__(self, chi2):
+        loss = op.elu(chi2-self.chi2ref, alpha=self.alpha)
+        return self.c * loss.numpy()
+
+    
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
@@ -18,6 +18,7 @@
 from n3fit import model_gen
 from n3fit.backends import NN_LAYER_ALL_REPLICAS, MetaModel, callbacks, clear_backend_state
 from n3fit.backends import operations as op
+from n3fit.layers import losses
 from n3fit.hyper_optimization.hyper_scan import HYPEROPT_STATUSES
 import n3fit.hyper_optimization.penalties
 from n3fit.hyper_optimization.rewards import HyperLoss
@@ -891,6 +892,7 @@ def hyperparametrizable(self, params):
         trvl_chi2_per_fold = []
         trvl_phi2_per_fold = []
         trvl_logp_per_fold = []
+        trvl_chi2exp_per_fold = []
 
         # Generate the grid in x, note this is the same for all partitions
         xinput = self._xgrid_generation()
@@ -1044,6 +1046,7 @@ def hyperparametrizable(self, params):
                 l_valid.append(validation_loss)
                 l_exper.append(experimental_loss)
                 trvl_chi2_per_fold.append(hyper_metrics.chi2)
+                trvl_chi2exp_per_fold.append(hyper_metrics.chi2exp)
                 trvl_phi2_per_fold.append(hyper_metrics.phi2)
                 trvl_logp_per_fold.append(hyper_metrics.logp)
                 pdfs_per_fold.append(pdf_model)
@@ -1074,6 +1077,11 @@ def hyperparametrizable(self, params):
 
             # Compute the loss over all folds for hyperopt
             final_hyper_loss = self._hyper_loss.reduce_over_folds(l_hyper)
+            
+            # Add penalty term to ensure convergence
+            exp_chi2_fitted_data = np.average(trvl_chi2exp_per_fold)
+            expchi2_penalty = losses.LossHyperopt()
+            final_hyper_loss += expchi2_penalty(exp_chi2_fitted_data) 
 
             # Hyperopt needs a dictionary with information about the losses
             # it is possible to store arbitrary information in the trial file
@@ -1086,6 +1094,7 @@ def hyperparametrizable(self, params):
                 "kfold_meta": {
                     "validation_losses": l_valid,
                     "trvl_losses_chi2": np.array(trvl_chi2_per_fold),
+                    "trvl_losses_chi2exp": np.array(trvl_chi2exp_per_fold),
                     "trvl_losses_phi2": np.array(trvl_phi2_per_fold),
                     "trvl_losses_logp": np.array(trvl_logp_per_fold),
                     "experimental_losses": l_exper,
diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py
@@ -62,6 +62,7 @@ class HyperoptMetrics:
     chi2: float
     phi2: float
     logp: float
+    chi2exp: float
 
 
 class N3Stats(MCStats):
@@ -443,11 +444,15 @@ def compute_hyperopt_metrics(n3pdf, experimental_data) -> HyperoptMetrics:
     # Compute the chi2
     total_covmat_chol = la.cholesky(total_covmat, lower=True)
     chi2 = calc_chi2(sqrtcov=total_covmat_chol, diffs=diffs)
+    
+    # Compute the experimental chi2
+    exp_covmat_chol = la.cholesky(exp_cov, lower=True)
+    chi2exp = calc_chi2(sqrtcov=exp_covmat_chol, diffs=diffs)
 
     # Compute phi2
     phi2 = calc_phi(sqrtcov=exp_covmat_col, diffs=diffs_reps)
 
     ndat = len(diffs)
     logp = -0.5 * (len(diffs) * np.log(2 * np.pi) + log_det_total_cov + chi2)
 
-    return HyperoptMetrics(chi2=chi2 / ndat, phi2=phi2, logp=-logp / ndat)
+    return HyperoptMetrics(chi2=chi2 / ndat, phi2=phi2, logp=-logp / ndat, chi2exp=chi2exp / ndat)