Merge pull request #2401 from NNPDF/make_feature_scaling_clearer

scarlehoff · web-flow · commit 355b4f3da441 · 2025-12-22T15:46:25.000+01:00
Make the usage of feature scaling + large x a bit clearer
diff --git a/doc/sphinx/source/n3fit/methodology.rst b/doc/sphinx/source/n3fit/methodology.rst
@@ -323,7 +323,7 @@ incapable of distinguishing features across many orders of magnitude of ``x``, t
 scaling means that the algorithm is limited to learning features on a logarithmic and linear scale.
 
 To solve this problem there is the possibility to apply a different feature scaling to the input by
-adding a ``interpolation_points: [number of points]`` flag to the ``n3fit`` runcard. By adding this
+adding a ``feature_scaling_points: [number of points]`` flag to the ``n3fit`` runcard. By adding this
 flag the ``(x,log(x))`` scaling is replaced by a scaling in such a way that all input ``x`` values
 are evenly distributed on the domain ``[-1,1]``, and the input node is no longer split in two.
 
@@ -333,7 +333,7 @@ increasing cubic spline is used to interpolate after the scaling has been applie
 function from the scipy library is used. However, this way the neural network will be agnostic to
 the existence of this interpolation function meaning it can no longer learn the true underlying law.
 To fix this, the interpolation function has to be probed as well. This is done by only using
-``[number of points]`` set by the ``interpolation_points`` flag to define the interpolation function
+``[number of points]`` set by the ``feature_scaling_points`` flag to define the interpolation function
 after the scaling has been applied. Using this methodology the points used in the interpolation are
 again evenly distributed.
 
diff --git a/extra_tests/regression_fits/feature_scaling.yml b/extra_tests/regression_fits/feature_scaling.yml
@@ -50,20 +50,20 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   layer_type: 'dense'
   dropout: 0.0
   threshold_chi2: 10.0
-  interpolation_points: 15
+  feature_scaling_points: 15
 
 fitting:
   savepseudodata: False
   fitbasis: EVOL
   basis:
-  - {fl: sng, trainable: false, smallx: [1.094, 1.118], largex: [1.46, 3.003]}
-  - {fl: g, trainable: false, smallx: [0.8189, 1.044], largex: [2.791, 5.697]}
-  - {fl: v, trainable: false, smallx: [0.457, 0.7326], largex: [1.56, 3.431]}
-  - {fl: v3, trainable: false, smallx: [0.1462, 0.4061], largex: [1.745, 3.452]}
-  - {fl: v8, trainable: false, smallx: [0.5401, 0.7665], largex: [1.539, 3.393]}
-  - {fl: t3, trainable: false, smallx: [-0.4401, 0.9163], largex: [1.773, 3.333]}
-  - {fl: t8, trainable: false, smallx: [0.5852, 0.8537], largex: [1.533, 3.436]}
-  - {fl: t15, trainable: false, smallx: [1.082, 1.142], largex: [1.461, 3.1]}
+  - {fl: sng, trainable: false, smallx: [1.094, 1.118]}
+  - {fl: g, trainable: false, smallx: [0.8189, 1.044]}
+  - {fl: v, trainable: false, smallx: [0.457, 0.7326]}
+  - {fl: v3, trainable: false, smallx: [0.1462, 0.4061]}
+  - {fl: v8, trainable: false, smallx: [0.5401, 0.7665]}
+  - {fl: t3, trainable: false, smallx: [-0.4401, 0.9163]}
+  - {fl: t8, trainable: false, smallx: [0.5852, 0.8537]}
+  - {fl: t15, trainable: false, smallx: [1.082, 1.142]}
 
 ############################################################
 positivity:
diff --git a/n3fit/runcards/example-nnpdf41.yml b/n3fit/runcards/example-nnpdf41.yml
@@ -127,21 +127,22 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   layer_type: dense
   dropout: 0.0
   threshold_chi2: 3.5
-  interpolation_points: 5
+  feature_scaling_points: 5
 
 fitting:
   fitbasis: CCBAR_ASYMM  # EVOL (7), EVOLQED (8), etc.
   savepseudodata: true
   basis:
-  - {fl : sng, trainable : false, smallx : [1.095, 1.146], largex   : [0., 0.]}
-  - {fl : g, trainable   : false, smallx : [0.7978, 1.087], largex  : [0., 0.]}
-  - {fl : v, trainable   : false, smallx : [0.4775, 0.6577], largex : [0., 0.]}
-  - {fl : v3, trainable  : false, smallx : [0.1068, 0.493], largex  : [0., 0.]}
-  - {fl : v8, trainable  : false, smallx : [0.5914, 0.7776], largex : [0., 0.]}
-  - {fl : t3, trainable  : false, smallx : [-0.3737, 1.0], largex   : [0., 0.]}
-  - {fl : t8, trainable  : false, smallx : [0.5771, 0.9486], largex : [0., 0.]}
-  - {fl : t15, trainable : false, smallx : [1.062, 1.153], largex   : [0., 0.]}
-  - {fl : v15, trainable : false, smallx : [0.4515, 0.7648], largex : [0., 0.]}
+  - {fl : sng , trainable : false , smallx : [1.095   , 1.146]  }
+  - {fl : g   , trainable : false , smallx : [0.7978  , 1.087]  }
+  - {fl : v   , trainable : false , smallx : [0.4775  , 0.6577] }
+  - {fl : v3  , trainable : false , smallx : [0.1068  , 0.493]  }
+  - {fl : v8  , trainable : false , smallx : [0.5914  , 0.7776] }
+  - {fl : t3  , trainable : false , smallx : [-0.3737 , 1.0]    }
+  - {fl : t8  , trainable : false , smallx : [0.5771  , 0.9486] }
+  - {fl : t15 , trainable : false , smallx : [1.062   , 1.153]  }
+  - {fl : v15 , trainable : false , smallx : [0.4515  , 0.7648] }
+
 
 ################################################################################
 positivity:
diff --git a/n3fit/runcards/examples/Basic_feature_scaling.yml b/n3fit/runcards/examples/Basic_feature_scaling.yml
@@ -49,21 +49,21 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   stopping_patience: 0.30 # percentage of the number of epochs
   layer_type: 'dense'
   dropout: 0.0
-  interpolation_points: 40
+  feature_scaling_points: 40
   threshold_chi2: 5.0
 
 fitting:
   fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc.
   basis:
       # remeber to change the name of PDF accordingly with fitbasis
-      - { fl: sng,  smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False }
-      - { fl: g,    smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False }
-      - { fl: v,    smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False }
-      - { fl: v3,   smallx: [0.21,0.57], largex: [1.35,3.08] }
-      - { fl: v8,   smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True }
-      - { fl: t3,   smallx: [-0.37,1.52], largex: [1.74,3.39] }
-      - { fl: t8,   smallx: [0.56,1.29], largex: [1.45,3.03] }
-      - { fl: cp,   smallx: [0.12,1.19], largex: [1.83,6.70] }
+      - { fl: sng,  smallx: [1.05,1.19], trainable: False }
+      - { fl: g,    smallx: [0.94,1.25], trainable: False }
+      - { fl: v,    smallx: [0.54,0.75], trainable: False }
+      - { fl: v3,   smallx: [0.21,0.57] }
+      - { fl: v8,   smallx: [0.52,0.76], trainable: True }
+      - { fl: t3,  smallx: [-0.37,1.52] }
+      - { fl: t8,   smallx: [0.56,1.29] }
+      - { fl: cp,   smallx: [0.12,1.19] }
 
 ############################################################
 positivity:
diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py
@@ -396,6 +396,7 @@ def check_consistent_basis(sum_rules, fitbasis, basis, theoryid, parameters):
     - Checks the sum rules can be imposed
     - Correct flavours for the selected basis
     - Correct ranges (min < max) for the small and large-x exponents
+    - When feature scaling is active, the large_x interpolation is not set
     """
     check_sumrules(sum_rules)
     # Check that there are no duplicate flavours and that parameters are sane
@@ -405,12 +406,19 @@ def check_consistent_basis(sum_rules, fitbasis, basis, theoryid, parameters):
         smallx = flavour_dict["smallx"]
         if smallx[0] > smallx[1]:
             raise CheckError(f"Wrong smallx range for flavour {name}: {smallx}")
-        largex = flavour_dict.get("largex")
-        if largex is not None and largex[0] > largex[1]:
-            raise CheckError(f"Wrong largex range for flavour {name}: {largex}")
         if name in flavs:
             raise CheckError(f"Repeated flavour name: {name}. Check basis dictionary")
         flavs.append(name)
+
+        # Large-x is allowed to not exist if feature scaling is enabled
+        if parameters.get("feature_scaling_points") is not None:
+            if "largex" in flavour_dict and not flavour_dict["largex"] == [0.0, 0.0]:
+                raise CheckError("No largex exponent allowed when feature_scaling_points is set")
+        else:
+            largex = flavour_dict["largex"]
+            if largex[0] > largex[1]:
+                raise CheckError(f"Wrong largex range for flavour {name}: {largex}")
+
     # Finally check whether the basis considers or not charm
     # Check that the basis given in the runcard is one of those defined in validphys.pdfbases
     vp_basis = check_basis(fitbasis, flavs)["basis"]
@@ -438,7 +446,7 @@ def check_consistent_parallel(parameters, parallel_models):
 
 
 @make_argcheck
-def check_deprecated_options(fitting):
+def check_deprecated_options(fitting, parameters):
     """Checks whether the runcard is using deprecated options"""
     options_outside = ["trvlseed", "nnseed", "mcseed", "save", "load", "genrep", "parameters"]
     for option in options_outside:
@@ -452,6 +460,10 @@ def check_deprecated_options(fitting):
     for option in nnfit_options:
         if option in fitting:
             log.warning("'fitting::%s' is an nnfit-only key, it will be ignored", option)
+    if "interpolation_points" in parameters:
+        raise CheckError(
+            "`interpolation_points` no longer accepted, please change to `feature_scaling_points`"
+        )
 
 
 @make_argcheck
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
@@ -18,10 +18,10 @@
 from n3fit import model_gen
 from n3fit.backends import NN_LAYER_ALL_REPLICAS, MetaModel, callbacks, clear_backend_state
 from n3fit.backends import operations as op
-from n3fit.layers import losses
 from n3fit.hyper_optimization.hyper_scan import HYPEROPT_STATUSES
 import n3fit.hyper_optimization.penalties
 from n3fit.hyper_optimization.rewards import HyperLoss
+from n3fit.layers import losses
 from n3fit.scaler import generate_scaler
 from n3fit.stopping import Stopping
 from n3fit.vpinterface import N3PDF, compute_hyperopt_metrics
@@ -876,7 +876,7 @@ def hyperparametrizable(self, params):
             integrability_dict.get("multiplier"),
             integrability_dict.get("initial"),
             epochs,
-            params.get("interpolation_points"),
+            params.get("feature_scaling_points"),
         )
         threshold_pos = positivity_dict.get("threshold", 1e-6)
         threshold_chi2 = params.get("threshold_chi2", CHI2_THRESHOLD)
@@ -1077,11 +1077,11 @@ def hyperparametrizable(self, params):
 
             # Compute the loss over all folds for hyperopt
             final_hyper_loss = self._hyper_loss.reduce_over_folds(l_hyper)
-            
+
             # Add penalty term to ensure convergence
             exp_chi2_fitted_data = np.average(trvl_chi2exp_per_fold)
             expchi2_penalty = losses.LossHyperopt()
-            final_hyper_loss += expchi2_penalty(exp_chi2_fitted_data) 
+            final_hyper_loss += expchi2_penalty(exp_chi2_fitted_data)
 
             # Hyperopt needs a dictionary with information about the losses
             # it is possible to store arbitrary information in the trial file
diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py
@@ -331,8 +331,11 @@ def previous_effective_exponents_table(fit: FitSpec):
     )
     basis = checked["basis"]
     flavours = checked["flavours"]
-    prev_a_bounds = [runcard_fl['smallx'] for runcard_fl in fitting["basis"]]
-    prev_b_bounds = [runcard_fl['largex'] for runcard_fl in fitting["basis"]]
+    prev_a_bounds = []
+    prev_b_bounds = []
+    for runcard_fl in fitting["basis"]:
+        prev_a_bounds.append(runcard_fl.get("smallx", (0.0, 0.0)))
+        prev_b_bounds.append(runcard_fl.get("largex", (0.0, 0.0)))
     # make single list alternating alpha and beta bounds
     data = [vals for pair in zip(prev_a_bounds, prev_b_bounds) for vals in pair]
     flavours_label = [f"${basis.elementlabel(fl)}$" for fl in flavours]
@@ -449,6 +452,10 @@ def effective_exponents_table_internal(next_effective_exponents_table, *, fit=No
         # have to call action here in case fit is None
         previous_table = previous_effective_exponents_table(fit)
         df = pd.concat((previous_table, next_effective_exponents_table), axis=1)
+
+        if "feature_scaling_points" in fit.as_input()["parameters"]:
+            # Drop the beta if feature scaling points is enabled
+            df.loc[df.index.get_level_values(1) == r'$\beta$', :] = None
     else:
         df = next_effective_exponents_table
     return df
@@ -509,6 +516,12 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg=
     checked = check_basis(basis, None)
     basis = checked["basis"]
 
+    # If the runcard still has the old option `interpolation_points` change it to `feature_scaling_points`:
+    if "interpolation_points" in filtermap["parameters"]:
+        filtermap["parameters"]["feature_scaling_points"] = filtermap["parameters"].pop(
+            "interpolation_points"
+        )
+
     # use order defined in runcard.
     runcard_flavours = [f"{basis.elementlabel(ref_fl['fl'])}" for ref_fl in previous_exponents]
     for i, fl in enumerate(runcard_flavours):
@@ -523,7 +536,13 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg=
             if largex_args is not None:
                 betas = np.clip(betas, **largex_args)
         previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas]
-        previous_exponents[i]["largex"] = [fmt(beta) for beta in betas]
+        # Regardless of whether there was a large x in the original runcard
+        # drop it if feature scaling is set, to avoid future mistakes
+        if filtermap["parameters"].get("feature_scaling_points") is None:
+            previous_exponents[i]["largex"] = [fmt(beta) for beta in betas]
+        else:
+            # NB previous exponents is = filtermap (see above), if it dies here it dies in real life
+            previous_exponents[i].pop("largex", None)
     with tempfile.NamedTemporaryFile() as fp:
         path = Path(fp.name)
         yaml_rt.dump(filtermap, path)