NNPDF
diff --git a/‎.github/actions/prepare_environment/action.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/actions/prepare_environment/action.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/all_tests_nnpdf.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/all_tests_nnpdf.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/fitbot.yml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/fitbot.yml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎doc/sphinx/make_theory_csv.py‎
Lines changed: 2 additions & 2 deletions b/‎doc/sphinx/make_theory_csv.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/sphinx/source/data/data-config.rst‎
Lines changed: 5 additions & 1 deletion b/‎doc/sphinx/source/data/data-config.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎doc/sphinx/source/index.rst‎
Lines changed: 10 additions & 6 deletions b/‎doc/sphinx/source/index.rst‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎doc/sphinx/source/n3fit/methodology.rst‎
Lines changed: 55 additions & 0 deletions b/‎doc/sphinx/source/n3fit/methodology.rst‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎doc/sphinx/source/theory_params.csv‎
Lines changed: 25 additions & 14 deletions b/‎doc/sphinx/source/theory_params.csv‎
Lines changed: 25 additions & 14 deletions
diff --git a/‎doc/sphinx/source/tutorials/datthcomp.rst‎
Lines changed: 22 additions & 29 deletions b/‎doc/sphinx/source/tutorials/datthcomp.rst‎
Lines changed: 22 additions & 29 deletions
@@ -19,7 +19,7 @@ inputs:
   python-version:
     required: true
     description: "Python version to use"
-    default: "3.12"
+    default: "3.13"
 
 runs:
   using: "composite"
@@ -33,7 +33,9 @@ runs:
         auto-update-conda: true
         activate-environment: nnpdf_environment
         conda-remove-defaults: true
-    - name: Add NETRC
+    - name: Add NETRC and cleanup cache
       shell: bash -l {0}
       run: |
+        sudo rm -rf /opt/hostedtoolcache/Python/3.9*
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
         echo "$NETRC_FILE" | base64 --decode > ~/.netrc
@@ -35,7 +35,7 @@ jobs:
       - name: Test n3fit and validphys
         shell: bash -l {0}
         run: |
-          pytest --cov=${PWD} --cov-config=pyproject.toml --mpl --pyargs validphys n3fit --mpl-default-tolerance 18
+          pytest --cov=${PWD} --cov-config=pyproject.toml --mpl --pyargs validphys n3fit --mpl-default-tolerance 24
       - name: Keep coverage file
         if: startsWith(matrix.python-version, '3.13')
         uses: actions/upload-artifact@v4
@@ -100,6 +100,8 @@ jobs:
     - name: Install nnpdf without LHAPDF
       shell: bash -l {0}
       run: |
+        sudo rm -rf /opt/hostedtoolcache/Python/{3.9*}
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
         pip install .[nolha,torch]
         # Since there is no LHAPDF in the system, initialize the folder and download pdfsets.index
         lhapdf-management update --init
 
@@ -8,10 +8,10 @@ on:
 
 # some general variables
 env:
-  N3FIT_MAXNREP: 20 # total number of replicas to fit
-  POSTFIT_NREP: 16 # requested replicas for postfit
-  REFERENCE_SET: NNBOT-955eb2bcc-2025-06-17 # reference set for exact results
-  STABLE_REFERENCE_SET: NNBOT-955eb2bcc-2025-06-17 # reference set for last tag
+  N3FIT_MAXNREP: 30 # total number of replicas to fit
+  POSTFIT_NREP: 15 # requested minimum replicas for postfit
+  REFERENCE_SET: NNBOT-99108504e-2025-11-22 # reference set for exact results
+  STABLE_REFERENCE_SET: NNBOT-99108504e-2025-11-22 # reference set for last tag
   PYTHONHASHSEED: "0"
 
 jobs:
@@ -55,12 +55,12 @@ jobs:
         cd $RUNFOLDER
         cp developing.yml $RUNCARD.yml
         vp-setupfit $RUNCARD.yml
-    # run n3fit replicas sequentially
+    # try running the n3fit replicas in parallel
     - name: Running n3fit
       shell: bash -l {0}
       run: |
         cd $RUNFOLDER
-        for ((i=1; i<=$N3FIT_MAXNREP; i+=1)); do n3fit $RUNCARD.yml $i ; done
+        n3fit $RUNCARD.yml 1 -r $N3FIT_MAXNREP
     # performing DGLAP
     - name: Running dglap
       shell: bash -l {0}
@@ -79,7 +79,7 @@ jobs:
       run: |
         conda activate nnpdfenv
         cd $RUNFOLDER
-        postfit $POSTFIT_NREP $RUNCARD
+        postfit $POSTFIT_NREP $RUNCARD --at-least-nrep
         res=$(vp-upload $RUNCARD 2>&1)
         echo ${res}
         while echo ${res} | grep ERROR >/dev/null
 
@@ -5,7 +5,7 @@
 from argparse import ArgumentParser
 from pathlib import Path
 
-from nnpdf_data import theory_cards
+from nnpdf_data import THEORY_CARDS_PATH
 from nnpdf_data.theorydbutils import fetch_all
 
 if __name__ == "__main__":
@@ -20,7 +20,7 @@
 
     args = parser.parse_args()
 
-    theory_df = fetch_all(theory_cards)
+    theory_df = fetch_all(THEORY_CARDS_PATH)
 
     # Enforce the following order in the table:
     order = ["PTO", "QED", "Comments", "IC", "Q0", "ModEv"]
 
@@ -23,9 +23,13 @@ located in the ``nnpdf`` git repository at
 
 where a separate ``CommonData`` file is stored for each *Dataset* with the
 filename format described in :ref:`dataset-naming-convention`.
-The data is installed as part of the python package of ``nnpdf``,
+The data is installed as part of the python package of ``nnpdf`` or ``nnpdf_data``.
 all data files to be installed must have a ``.yaml`` extension.
 
+It is possible to add extra sources of data by adding a ``data_path`` variable
+to the ``nnprofile.yaml`` file (see :ref:`nnprofile`).
+The extra sources will always be searched before the default path.
+
 
 Theory lookup table
 ===================
 
@@ -33,24 +33,28 @@ The NNPDF collaboration is currently composed of the following
 members:
 
 * Richard D. Ball - University of Edinburgh
-* Alessandro Candido - CERN
+* Alessandro Candido - Quantum Research Centre, Technology Innovation Institute, Abu Dhabi
 * Stefano Carrazza - Università degli Studi di Milano and INFN
 * Amedeo Chiefa - University of Edinburgh
+* Ella Cole - University of Cambridge
 * Mark Costantini - University of Cambridge
-* Juan M. Cruz-Martinez - CERN
+* Juan M. Cruz-Martinez - Universidad de Sevilla
 * Luigi Del Debbio - University of Edinburgh
 * Stefano Forte - Università degli Studi di Milano and INFN
-* Tommaso Giani - Max-Planck-Institut für Physik
+* Tommaso Giani - Università degli Studi di Torino and INFN
+* Eva Groenendijk - Università degli Studi di Milano and INFN
 * Felix Hekhorn - University of Jyväskylä
-* José Ignacio Latorre - Quantum Research Centre, Technology
-  Innovation Institute, Abu Dhabi, United Arab Emirates and Center for Quantum Technologies, National University of Singapore
+* Jaco ter Hoeve - University of Edinburgh
+* José Ignacio Latorre - Quantum Research Centre, Technology Innovation Institute, Abu Dhabi, United Arab Emirates and Center for Quantum Technologies, National University of Singapore
 * Giacomo Magni - IJCLab Orsay and CNRS
 * Emanuele R. Nocera - Università degli Studi di Torino and INFN
 * Tanjona R. Rabemananjara - Vrije University Amsterdam and Nikhef
 * Juan Rojo - Vrije University Amsterdam and Nikhef
+* Valentina Schutze - University of Cambridge
 * Tanishq Sharma - Università di Torino and INFN
-* Roy Stegeman - University of Edinburgh
+* Roy Stegeman - Quantum Research Centre, Technology Innovation Institute, Abu Dhabi
 * Maria Ubiali - University of Cambridge
+* Ramon Winterhalder - Università degli Studi di Milano and INFN
 
 Former members of the NNPDF collaboration include
 
 
@@ -346,3 +346,58 @@ The figure above provides a schematic representation of this feature scaling met
 2. ``[number of points]`` points are kept (dark blue), while other points are discarded (light blue).
 3. A cubic spline function is used to do the interpolation between the points that have not been
    discarded.
+
+
+Diagonal basis
+--------------
+
+Performing the training and validation split without diagonalising the :math:`t_0` covmat :math:`C_{0}` neglects
+any correlations that may be present between training and validation data. To remedy this,
+we rotate to a basis in which the correlation matrix is diagonal before performing any training/validation split.
+Starting from the definition of the :math:`\chi^2` function in the NNPDF methodology, we have
+
+.. math::
+
+    \chi^2 &= (D-T)^T C_0^{-1} (D-T) \\
+           &= (D-T)^T R^{-1} R C_0^{-1} R R^{-1} (D-T) \\
+           &= (D-T)^T R^{-1} \left( R^{-1} C_0 R^{-1} \right)^{-1} R^{-1} (D-T) \\
+           &\equiv \tilde{\epsilon}^T \rho^{-1} \tilde{\epsilon} \, ,
+
+where we have defined :math:`\tilde{\epsilon} \equiv R^{-1}(D-T)` and :math:`\rho = R^{-1} C_0 R^{-1}`.
+
+Choosing :math:`R_{ii} = \sqrt{C_{0, ii}}`, we have that :math:`R^{-1} C_0 R^{-1}` coincides with the usual definition of the correlation matrix.
+
+Next, we move to the basis in which :math:`\rho` is diagonal. Writing :math:`\rho = \tilde{U}^T \tilde{\Lambda} \tilde{U}`, we find
+
+.. math::
+
+    \chi^2 &= \tilde{\epsilon}^T \rho^{-1} \tilde{\epsilon} \\
+           &= \tilde{\epsilon}^T (\tilde{U}^T \tilde{\Lambda} \tilde{U})^{-1} \tilde{\epsilon} \\
+           &= \tilde{\epsilon}^T \tilde{U}^T \tilde{\Lambda}^{-1} \tilde{U} \tilde{\epsilon} \\
+           &\equiv \dbtilde{\epsilon}^T \tilde{\Lambda}^{-1} \dbtilde{\epsilon} \, ,
+
+where on the last line we have defined
+
+.. math::
+
+    \dbtilde{\epsilon} \equiv \tilde{U}\tilde{\epsilon} = \tilde{U}R^{-1}(D-T).
+
+In index notation, this reads
+
+.. math::
+
+    \dbtilde{\epsilon_i} = \tilde{U}_{ij} \frac{(D-T)_j}{\sqrt{C_{0, jj}}}
+
+The transformed data :math:`\dbtilde{\epsilon}` is statistically independent in the diagonal basis of the correlation matrix :math:`\rho`.
+Computing the covariance of :math:`\dbtilde{\epsilon}`,
+
+.. math::
+
+    \mathbb{E}[\dbtilde{\epsilon}\dbtilde{\epsilon}^T]
+      &= \mathbb{E} \big[ (\tilde{U} R^{-1}(D-T)) (\tilde{U} R^{-1}(D-T))^T \big] \\
+      &= \tilde{U} R^{-1} \mathbb{E}[(D-T)(D-T)^T] R^{-1} \tilde{U}^T \\
+      &= \tilde{U} \rho \tilde{U}^T \\
+      &= \tilde{U}\tilde{U}^T \tilde{\Lambda} \tilde{U}\tilde{U}^T \\
+      &= \tilde{\Lambda} \, ,
+
+we find that it is diagonal, which demonstrates that the training/validation data are statistically independent indeed.
@@ -2,23 +2,12 @@ Field/key, Type, Description, Comments
 ID, Integer, TheoryID, Theory enumerating ID
 PTO, Integer, pQCD order, (0/1/2 = LO/NLO/NNLO)
 FNS, Text, Flavour Number Scheme, "e.g FONLL-A/B/C, ZM-VFNS or FFNS"
-DAMP, Integer, FONLL damping factor switch, Boolean
 IC, Integer, Intrinsic charm switch, Boolean
 ModEv, Text, DGLAP solution mode, EXA/EXP/TRN
 XIR, Real, :math:`\xi_R`, :math:`\mu_R/Q`
 XIF,  Real, :math:`\xi_F`, :math:`\mu_F/Q`
-NfFF, Integer, Number of flavours in the FFNS, 3/4/5/6
-MaxNfAs, Integer, :math:`n^{(\alpha_s)}_{f \textrm{max}}`, Max active flavours in :math:`\alpha_s`
-MaxNfPdf, Integer, :math:`n^{(PDF)}_{f \textrm{max}}`, Max active flavours in PDFs
-Q0, Real, :math:`Q_0`, FK Table initial scale
-alphas, Real, Strong coupling, Format: :math:`\alpha_s(Q_{\textrm{ref}})`
-Qref, Real, :math:`Q_{\textrm{ref}}`, Reference scale for :math:`\alpha_S` in GeV
 QED, Integer, QED switch, Boolean
-alphaqed, Real, QED coupling, Format: :math:`\alpha_{QED}(Q_{\textrm{qedref}})`
-Qedref, Real, :math:`Q_{\textrm{qedref}}`, QED reference scale (GeV) 
-SxRes, Integer, small-:math:`x` resummation switch, Boolean
-SxOrd, Text, small-:math:`x` pt switch, "('LL', 'NLL', 'NLL')"
-HQ, Text, HQ mass treatment, POLE/MSBAR
+Q0, Real, :math:`Q_0`, FK Table initial scale
 mc, Real, :math:`c` quark mass :math:`M_c/m_c(Q_{mc})`, Units: GeV
 Qmc, Real, :math:`Q_{mc}`, :math:`c` reference scale (GeV)
 kcThr, Real, :math:`c` production threshold ratio, Ratio to :math:`m_c`
@@ -35,6 +24,28 @@ GF, Real, :math:`G_F`, Fermi coupling constant
 SIN2TW, Real, :math:`\sin^2\theta_W`,
 TMC, Integer, Target mass corrections, Boolean
 MP, Real, :math:`M_P` Proton mass, Units: GeV
+Comments, Text, General comments
+alphas, Real, Strong coupling, Format: :math:`\alpha_s(Q_{\textrm{ref}})`
+alphaqed, Real, QED coupling, Format: :math:`\alpha_{QED}(Q_{\textrm{qedref}})`
+Qref, Real, :math:`Q_{\textrm{ref}}`, Reference scale for :math:`\alpha_S` and :math:`\alpha_{\rm EW}` in GeV
+XIA, Real, :math:`\xi_A`, :math:`\mu_A/Q`
+NfFF, Integer, Number of flavours in the FFNS, 3/4/5/6
+nfref, Integer, :math:`n_f^{(\textrm{ref})}`, Number of active flavours at :math:`Q_{\textrm{ref}}`
+MaxNfPdf, Integer, :math:`n_f^{\textrm{max}}`, Used by pineko and the photon module to define the thresholds
+nf0, Integer, :math:`n_f^{(0)}`, Number of active flavours at :math:`Q_0`
+HQ, Text, HQ mass treatment, POLE/MSBAR
+IterEv, Integer, iterations for the evolution of the PDF, Defaults to 60 when ModEv = EXA
+ModSV, Text, Scale variation scheme, expanded/exponentiated
+DAMP, Integer, FONLL damping factor switch, Boolean
+DAMPPOWERc, Integer, Damping power for charm in FONLL, needed when DAMP and FONLL are used
+DAMPPOWERb, Integer, Damping power for bottom in FONLL, needed when DAMP and FONLL are used
+n3lo_ad_variation, List, IHOU variation of the N3LO anomalous dimensions, Order is (gg, gq, qg, qq, nsp, nsm, nsv)
+ with values 0 or 1 for FHMRUVV
+n3lo_cf_variation, List, IHOU variations of the N3LO massive DIS coefficient functions, Values are -1, 0, 1
+use_fhmruvv, Integer, use N3LO anomalous dimensions by FHMRUVV group, Boolean
+MaxNfAs, Integer, :math:`n^{(\alpha_s)}_{f \textrm{max}}`, Max active flavours in :math:`\alpha_s`
+SxRes, Integer, small-:math:`x` resummation switch, Boolean
+SxOrd, Text, small-:math:`x` pt switch, "('LL', 'NLL', 'NLL')"
+EScaleVar, Integer, Switch for DGLAP scale variation, Boolean
+Qedref, Real, :math:`Q_{\textrm{qedref}}`, QED reference scale (GeV)
 global_nx, Integer, Global x-grid precision, Default (0) uses set-by-set precision
-EScaleVar, Real, Switch for DGLAP scale variation, Boolean
-Comments, Text, General comments,
 
@@ -10,7 +10,8 @@ You need to provide:
 1. A PDF which includes your data set;
 2. A valid theory ID;
 3. A choice of cuts policy;
-4. A list of data sets to do the comparison for.
+4. A list of data sets to do the comparison for;
+5. Options to shift theoretical predictions according to the correlated part of the experimental uncertainties and/or to normalise the comparison to the central value of the experimental data.
 
 Below is an example runcard for a data theory comparison for BCDMSP, ``runcard.yaml``:
 
@@ -21,38 +22,30 @@ Below is an example runcard for a data theory comparison for BCDMSP, ``runcard.y
       keywords: [example]
       author: Rosalyn Pearson
 
-  pdfs: 
-      - id: NNPDF31_nnlo_as_0118
-        label: NNPDF31_nnlo_as_0118
-
-  theoryid: 53
-
-  use_cuts: false
+  dataspecs:
+    - speclabel: "NNPDF40 (w/o shift)"
+      theoryid: 40_000_000
+      use_cuts: "internal"
+      with_shift: False
+      pdf: NNPDF40_nnlo_as_01180
+    - speclabel: "NNPDF40 (w/ shift)"
+      theoryid: 40_000_000
+      use_cuts: "internal"
+      with_shift: True
+      pdf: NNPDF40_nnlo_as_01180 
 
   dataset_inputs:
-        - { dataset: BCDMSP}
+      - { dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, variant: legacy}
+      - { dataset: ATLAS_1JET_8TEV_R06, variant: legacy}
+      - { dataset: BCDMS_NC_NOTFIXED_P_EM-F2, variant: legacy}
 
-  template: dthcomparison.md
+  template_text: |
+     # Data theory comparison with and without shifts
+     {@ with dataset_inputs @}
+     {@ plot_fancy_dataspecs(normalize_to=data) @}
+     {@ endwith @}
 
   actions_:
     - report(main=true)
 
-The corresponding template, ``dthcomparison.md``, looks like this:
-
-.. code:: yaml
-
-  %BCDMSP (theory ID 52)
-
-  {@ dataset_inputs plot_fancy @}
-  {@ dataset_inputs::pdfs plot_fancy(normalize_to=data)@}
-  {@ dataset_inputs::pdfs plot_chi2dist @}
-  {@ dataset_inputs::pdfs group_result_table @}
-
-1.  ``plot_fancy`` produces data-theory comparison plots for the data. This is called 
-    twice to produce both normalised and unnormalised sets of plots.
-2.  ``plot_chi2dist`` gives the chi2 distribution between the theory and data.
-3.  ``group_result_table`` gives the numerical values which appear in the plots.
-
-Running :code:`validphys runcard.yaml` should produce a ``validphys`` report of the data-theory 
-comparison like the one `here <https://vp.nnpdf.science/ErmVZEPGT42GCfreWwzalg==/>`_ - see the
-`vp-guide <https://data.nnpdf.science/validphys-docs/guide.html#development-installs>`_.
+The function ``plot_fancy_dataspecs`` produces data-theory comparison plots for the specified list of data for all of the data specifications ``dataspecs``. The code can be run as :code:`validphys runcard.yaml` which will produce a ``validphys`` report with the desired plots. See the runcard ``data_theory_comparison.yaml`` in the validphys ``examples`` folder for details.