change interpolation_points to feature_scaling_points ; now the n3fit checks are much more strict: no largex exponent is allowed when feature scaling is on, before they were silently dropped so no change of functionality is introduced by this PR, only change of checks ; vp-nextfitruncard automatically changes interpolation_points to feature_scaling_points and will drop largex if it is found together with feature_scaling_points

scarlehoff · scarlehoff · commit 47bd8d6ed812 · 2025-11-11T12:13:50.000+01:00
diff --git a/doc/sphinx/source/n3fit/methodology.rst b/doc/sphinx/source/n3fit/methodology.rst
@@ -323,7 +323,7 @@ incapable of distinguishing features across many orders of magnitude of ``x``, t
 scaling means that the algorithm is limited to learning features on a logarithmic and linear scale.
 
 To solve this problem there is the possibility to apply a different feature scaling to the input by
-adding a ``interpolation_points: [number of points]`` flag to the ``n3fit`` runcard. By adding this
+adding a ``feature_scaling_points: [number of points]`` flag to the ``n3fit`` runcard. By adding this
 flag the ``(x,log(x))`` scaling is replaced by a scaling in such a way that all input ``x`` values
 are evenly distributed on the domain ``[-1,1]``, and the input node is no longer split in two.
 
@@ -333,7 +333,7 @@ increasing cubic spline is used to interpolate after the scaling has been applie
 function from the scipy library is used. However, this way the neural network will be agnostic to
 the existence of this interpolation function meaning it can no longer learn the true underlying law.
 To fix this, the interpolation function has to be probed as well. This is done by only using
-``[number of points]`` set by the ``interpolation_points`` flag to define the interpolation function
+``[number of points]`` set by the ``feature_scaling_points`` flag to define the interpolation function
 after the scaling has been applied. Using this methodology the points used in the interpolation are
 again evenly distributed.
 
diff --git a/extra_tests/regression_fits/feature_scaling.yml b/extra_tests/regression_fits/feature_scaling.yml
@@ -50,20 +50,20 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   layer_type: 'dense'
   dropout: 0.0
   threshold_chi2: 10.0
-  interpolation_points: 15
+  feature_scaling_points: 15
 
 fitting:
   savepseudodata: False
   fitbasis: EVOL
   basis:
-  - {fl: sng, trainable: false, smallx: [1.094, 1.118], largex: [1.46, 3.003]}
-  - {fl: g, trainable: false, smallx: [0.8189, 1.044], largex: [2.791, 5.697]}
-  - {fl: v, trainable: false, smallx: [0.457, 0.7326], largex: [1.56, 3.431]}
-  - {fl: v3, trainable: false, smallx: [0.1462, 0.4061], largex: [1.745, 3.452]}
-  - {fl: v8, trainable: false, smallx: [0.5401, 0.7665], largex: [1.539, 3.393]}
-  - {fl: t3, trainable: false, smallx: [-0.4401, 0.9163], largex: [1.773, 3.333]}
-  - {fl: t8, trainable: false, smallx: [0.5852, 0.8537], largex: [1.533, 3.436]}
-  - {fl: t15, trainable: false, smallx: [1.082, 1.142], largex: [1.461, 3.1]}
+  - {fl: sng, trainable: false, smallx: [1.094, 1.118]}
+  - {fl: g, trainable: false, smallx: [0.8189, 1.044]}
+  - {fl: v, trainable: false, smallx: [0.457, 0.7326]}
+  - {fl: v3, trainable: false, smallx: [0.1462, 0.4061]}
+  - {fl: v8, trainable: false, smallx: [0.5401, 0.7665]}
+  - {fl: t3, trainable: false, smallx: [-0.4401, 0.9163]}
+  - {fl: t8, trainable: false, smallx: [0.5852, 0.8537]}
+  - {fl: t15, trainable: false, smallx: [1.082, 1.142]}
 
 ############################################################
 positivity:
diff --git a/n3fit/runcards/example-nnpdf41.yml b/n3fit/runcards/example-nnpdf41.yml
@@ -123,20 +123,21 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   layer_type: dense
   dropout: 0.0
   threshold_chi2: 3.5
-  interpolation_points: 5 
+  feature_scaling_points: 5
 fitting:
   fitbasis: CCBAR_ASYMM  # EVOL (7), EVOLQED (8), etc.
   savepseudodata: true
   basis:
-  - {fl : sng, trainable : false, smallx : [1.095, 1.146], largex   : [0., 0.]}
-  - {fl : g, trainable   : false, smallx : [0.7978, 1.087], largex  : [0., 0.]}
-  - {fl : v, trainable   : false, smallx : [0.4775, 0.6577], largex : [0., 0.]}
-  - {fl : v3, trainable  : false, smallx : [0.1068, 0.493], largex  : [0., 0.]}
-  - {fl : v8, trainable  : false, smallx : [0.5914, 0.7776], largex : [0., 0.]}
-  - {fl : t3, trainable  : false, smallx : [-0.3737, 1.0], largex   : [0., 0.]}
-  - {fl : t8, trainable  : false, smallx : [0.5771, 0.9486], largex : [0., 0.]}
-  - {fl : t15, trainable : false, smallx : [1.062, 1.153], largex   : [0., 0.]}
-  - {fl : v15, trainable : false, smallx : [0.4515, 0.7648], largex : [0., 0.]}
+  - {fl : sng , trainable : false , smallx : [1.095   , 1.146]  }
+  - {fl : g   , trainable : false , smallx : [0.7978  , 1.087]  }
+  - {fl : v   , trainable : false , smallx : [0.4775  , 0.6577] }
+  - {fl : v3  , trainable : false , smallx : [0.1068  , 0.493]  }
+  - {fl : v8  , trainable : false , smallx : [0.5914  , 0.7776] }
+  - {fl : t3  , trainable : false , smallx : [-0.3737 , 1.0]    }
+  - {fl : t8  , trainable : false , smallx : [0.5771  , 0.9486] }
+  - {fl : t15 , trainable : false , smallx : [1.062   , 1.153]  }
+  - {fl : v15 , trainable : false , smallx : [0.4515  , 0.7648] }
+
 
 ################################################################################
 positivity:
diff --git a/n3fit/runcards/examples/Basic_feature_scaling.yml b/n3fit/runcards/examples/Basic_feature_scaling.yml
@@ -51,25 +51,21 @@ parameters: # This defines the parameter dictionary that is passed to the Model
   stopping_patience: 0.30 # percentage of the number of epochs
   layer_type: 'dense'
   dropout: 0.0
-  interpolation_points: 40
+  feature_scaling_points: 40
   threshold_chi2: 5.0
 
 fitting:
-  # NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7)
-  # EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7)
-  # EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7)
-  # FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7)
   fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc.
   basis:
       # remeber to change the name of PDF accordingly with fitbasis
-      - { fl: sng,  smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False }
-      - { fl: g,    smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False }
-      - { fl: v,    smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False }
-      - { fl: v3,   smallx: [0.21,0.57], largex: [1.35,3.08] }
-      - { fl: v8,   smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True }
-      - { fl: t3,   smallx: [-0.37,1.52], largex: [1.74,3.39] }
-      - { fl: t8,   smallx: [0.56,1.29], largex: [1.45,3.03] }
-      - { fl: cp,   smallx: [0.12,1.19], largex: [1.83,6.70] }
+      - { fl: sng,  smallx: [1.05,1.19], trainable: False }
+      - { fl: g,    smallx: [0.94,1.25], trainable: False }
+      - { fl: v,    smallx: [0.54,0.75], trainable: False }
+      - { fl: v3,   smallx: [0.21,0.57] }
+      - { fl: v8,   smallx: [0.52,0.76], trainable: True }
+      - { fl: t3,  smallx: [-0.37,1.52] }
+      - { fl: t8,   smallx: [0.56,1.29] }
+      - { fl: cp,   smallx: [0.12,1.19] }
 
 ############################################################
 positivity:
diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py
@@ -396,6 +396,7 @@ def check_consistent_basis(sum_rules, fitbasis, basis, theoryid, parameters):
     - Checks the sum rules can be imposed
     - Correct flavours for the selected basis
     - Correct ranges (min < max) for the small and large-x exponents
+    - When feature scaling is active, the large_x interpolation is not set
     """
     check_sumrules(sum_rules)
     # Check that there are no duplicate flavours and that parameters are sane
@@ -405,12 +406,19 @@ def check_consistent_basis(sum_rules, fitbasis, basis, theoryid, parameters):
         smallx = flavour_dict["smallx"]
         if smallx[0] > smallx[1]:
             raise CheckError(f"Wrong smallx range for flavour {name}: {smallx}")
-        largex = flavour_dict.get("largex")
-        if largex is not None and largex[0] > largex[1]:
-            raise CheckError(f"Wrong largex range for flavour {name}: {largex}")
         if name in flavs:
             raise CheckError(f"Repeated flavour name: {name}. Check basis dictionary")
         flavs.append(name)
+
+        # Large-x is allowed to not exist if feature scaling is enabled
+        if parameters.get("feature_scaling_points") is not None:
+            if "largex" in flavour_dict and not flavour_dict["largex"] == [0.0, 0.0]:
+                raise CheckError("No largex exponent allowed when feature_scaling_points is set")
+        else:
+            largex = flavour_dict["largex"]
+            if largex[0] > largex[1]:
+                raise CheckError(f"Wrong largex range for flavour {name}: {largex}")
+
     # Finally check whether the basis considers or not charm
     # Check that the basis given in the runcard is one of those defined in validphys.pdfbases
     vp_basis = check_basis(fitbasis, flavs)["basis"]
@@ -438,7 +446,7 @@ def check_consistent_parallel(parameters, parallel_models):
 
 
 @make_argcheck
-def check_deprecated_options(fitting):
+def check_deprecated_options(fitting, parameters):
     """Checks whether the runcard is using deprecated options"""
     options_outside = ["trvlseed", "nnseed", "mcseed", "save", "load", "genrep", "parameters"]
     for option in options_outside:
@@ -452,6 +460,10 @@ def check_deprecated_options(fitting):
     for option in nnfit_options:
         if option in fitting:
             log.warning("'fitting::%s' is an nnfit-only key, it will be ignored", option)
+    if "interpolation_points" in parameters:
+        raise CheckError(
+            "`interpolation_points` no longer accepted, please change to `feature_scaling_points`"
+        )
 
 
 @make_argcheck
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
@@ -875,7 +875,7 @@ def hyperparametrizable(self, params):
             integrability_dict.get("multiplier"),
             integrability_dict.get("initial"),
             epochs,
-            params.get("interpolation_points"),
+            params.get("feature_scaling_points"),
         )
         threshold_pos = positivity_dict.get("threshold", 1e-6)
         threshold_chi2 = params.get("threshold_chi2", CHI2_THRESHOLD)
diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py
@@ -509,6 +509,12 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg=
     checked = check_basis(basis, None)
     basis = checked["basis"]
 
+    # If the runcard still has the old option `interpolation_points` change it to `feature_scaling_points`:
+    if "interpolation_points" in filtermap["parameters"]:
+        filtermap["parameters"]["feature_scaling_points"] = filtermap["parameters"].pop(
+            "interpolation_points"
+        )
+
     # use order defined in runcard.
     runcard_flavours = [f"{basis.elementlabel(ref_fl['fl'])}" for ref_fl in previous_exponents]
     for i, fl in enumerate(runcard_flavours):
@@ -523,7 +529,13 @@ def iterate_preprocessing_yaml(fit, next_fit_eff_exps_table, _flmap_np_clip_arg=
             if largex_args is not None:
                 betas = np.clip(betas, **largex_args)
         previous_exponents[i]["smallx"] = [fmt(alpha) for alpha in alphas]
-        previous_exponents[i]["largex"] = [fmt(beta) for beta in betas]
+        # Regardless of whether there was a large x in the original runcard
+        # drop it if feature scaling is set, to avoid future mistakes
+        if filtermap["parameters"].get("feature_scaling_points") is None:
+            previous_exponents[i]["largex"] = [fmt(beta) for beta in betas]
+        else:
+            # NB previous exponents is = filtermap (see above), if it dies here it dies in real life
+            previous_exponents[i].pop("largex", None)
     with tempfile.NamedTemporaryFile() as fp:
         path = Path(fp.name)
         yaml_rt.dump(filtermap, path)

Original file line number	Diff line number	Diff line change
`@@ -875,7 +875,7 @@ def hyperparametrizable(self, params):`
`875`	`875`	`integrability_dict.get("multiplier"),`
`876`	`876`	`integrability_dict.get("initial"),`
`877`	`877`	`epochs,`
`878`		`- params.get("interpolation_points"),`
	`878`	`+ params.get("feature_scaling_points"),`
`879`	`879`	`)`
`880`	`880`	`threshold_pos = positivity_dict.get("threshold", 1e-6)`
`881`	`881`	`threshold_chi2 = params.get("threshold_chi2", CHI2_THRESHOLD)`