speed up convolutions when using pineappl theories by exploiting the

scarlehoff · scarlehoff · commit 833dbd832817 · 2025-06-10T10:14:34.000+02:00
fact that the fktables are already ordered in x1/x2
diff --git a/validphys2/src/validphys/convolution.py b/validphys2/src/validphys/convolution.py
@@ -373,6 +373,13 @@ def _gv_hadron_predictions(loaded_fk, gv1func, gv2func=None):
     # possible x1-x2 combinations (f1, f2, x1, x2)
     luminosity = np.einsum("ijk, ijl->ijkl", expanded_gv1, expanded_gv2)
 
+    if not loaded_fk.legacy:
+        lx = len(xgrid)
+        lc = len(fl1)
+        fktab = sigma.values.reshape(-1, lx, lx, lc)
+        ret = np.einsum("rcab, nabc->nr", luminosity, fktab)
+        return pd.DataFrame(ret, index=loaded_fk.data_index)
+
     def appl(df):
         # x1 and x2 are encoded as the first and second index levels.
         xx1 = df.index.get_level_values(1)
@@ -381,6 +388,12 @@ def appl(df):
         partial_lumi = luminosity[..., xx1, xx2]
         return pd.Series(np.einsum("ijk,kj->i", partial_lumi, df.values))
 
+    # The gv1/gv2 grids are arrays of shape (replicas, flavours<14>, xarray)
+    # the expanded gv1/gv2 instead are shaped according to the channels (which will match)
+    # therefore the luminosity is an array of shape (replicas, channels, x1, x2)
+    # this needs to be matched with the fktable which for the old interface were not ordered
+    # and so the full dataframe needs to be used instead to keep track of the index
+
     return sigma.groupby(level=0).apply(appl)
 
 
@@ -397,6 +410,12 @@ def _gv_dis_predictions(loaded_fk, gvfunc):
     if sigma.empty:
         return pd.DataFrame(columns=range(gv.shape[0]))
 
+    if not loaded_fk.legacy:
+        lx = len(xgrid)
+        fktab = sigma.values.reshape(-1, lx, len(fm))
+        ret = np.einsum("rfa, naf->nr", gv, fktab)
+        return pd.DataFrame(ret, index=loaded_fk.data_index)
+
     def appl(df):
         # x is encoded as the first index level.
         xind = df.index.get_level_values(1)
diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
@@ -56,16 +56,25 @@ class FKTableData:
         The most common use-case is when a total cross section is used
         as a normalization table for a differential cross section,
         in legacy code (<= NNPDF4.0) both fktables would be cut using the differential index.
+
+    data_index: pd.Series
+        index of the data points
+
+    legacy: bool
+        If False, this corresponds to an FkTable read from the old applgrid interface.
+        Deprecated and support will be dropped during the 4.1.X series of tags.
     """
 
     hadronic: bool
     Q0: float
     ndata: int
     xgrid: np.ndarray
     sigma: pd.DataFrame
+    data_index: pd.Series
     convolution_types: tuple[str] = None
     metadata: dict = dataclasses.field(default_factory=dict, repr=False)
     protected: bool = False
+    legacy: bool = False
 
     def with_cfactor(self, cfactor):
         """Returns a copy of the FKTableData object with cfactors applied to the fktable"""
@@ -123,11 +132,12 @@ def with_cuts(self, cuts):
         newndata = len(cuts)
         try:
             newsigma = self.sigma.loc[cuts]
+            newdata_idx = self.data_index.loc[cuts]
         except KeyError as e:
             # This will be an ugly erorr msg, but it should be scary anyway
             log.error(f"Problem applying cuts to {self.metadata}")
             raise e
-        return dataclasses.replace(self, ndata=newndata, sigma=newsigma)
+        return dataclasses.replace(self, ndata=newndata, sigma=newsigma, data_index=newdata_idx)
 
     @property
     def luminosity_mapping(self):
@@ -168,8 +178,8 @@ def get_np_fktable(self):
         # Make the dataframe into a dense numpy array
 
         # First get the data index out of the way
-        # this is necessary because cuts/shifts and for performance reasons
-        # otherwise we will be putting things in a numpy array in very awkward orders
+        # this is necessary because cuts/shifts and because old fktables are not necessarily ordered
+        # in addition, for performance reason, we want to order the np array as (ndata, basis, x1, x2)
         ns = self.sigma.unstack(level=("data",), fill_value=0)
         x1 = ns.index.get_level_values(0)
 
@@ -244,5 +254,5 @@ class CFactorData:
     """
 
     description: str
-    central_value: np.array
-    uncertainty: np.array
+    central_value: np.ndarray
+    uncertainty: np.ndarray
diff --git a/validphys2/src/validphys/fkparser.py b/validphys2/src/validphys/fkparser.py
@@ -18,6 +18,8 @@
     res = load_fktable(fk)
 """
 
+# TODO: this module is deprecated and support for older theories will be removed
+
 import dataclasses
 import functools
 import io
@@ -313,9 +315,17 @@ def parse_fktable(f):
             hadronic = res['GridInfo'].hadronic
             ndata = res['GridInfo'].ndata
             xgrid = res.pop('xGrid')
+            data_idx = sigma.index.get_level_values("data").unique().to_series()
 
             return FKTableData(
-                sigma=sigma, ndata=ndata, Q0=Q0, metadata=res, hadronic=hadronic, xgrid=xgrid
+                sigma=sigma,
+                ndata=ndata,
+                Q0=Q0,
+                metadata=res,
+                hadronic=hadronic,
+                xgrid=xgrid,
+                data_index=data_idx,
+                legacy=True,
             )
         elif header_name in _KNOWN_SEGMENTS:
             parser = _KNOWN_SEGMENTS[header_name]
diff --git a/validphys2/src/validphys/pineparser.py b/validphys2/src/validphys/pineparser.py
@@ -203,6 +203,7 @@ def pineappl_reader(fkspec):
 
     partial_fktables = []
     ndata = 0
+    full_data_index = []
     for fkname, p in zip(fknames, pines):
         # Start by reading possible cfactors if cfactor is not empty
         cfprod = 1.0
@@ -247,6 +248,7 @@ def pineappl_reader(fkspec):
         partial_fktables.append(pd.DataFrame(df_fktable, columns=lumi_columns, index=idx))
 
         ndata += n
+        full_data_index.append(data_idx)
 
     # Finallly concatenate all fktables, sort by flavours and fill any holes
     sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0)
@@ -265,8 +267,14 @@ def pineappl_reader(fkspec):
             ndata = 1
 
         if ndata == 1:
-            # There's no doubt
-            protected = divisor == name
+            # When the number of points is 1 and the fktable is a divisor, protect it from cuts
+            if divisor == name:
+                protected = True
+                full_data_index = [[0]]
+
+    # Keeping the data index as a series is exploited to speed up certain operations (e.g. hadronic conv)
+    fid = np.concatenate(full_data_index)
+    data_index = pd.Series(fid, index=fid, name="data")
 
     return FKTableData(
         sigma=sigma,
@@ -277,4 +285,5 @@ def pineappl_reader(fkspec):
         hadronic=hadronic,
         xgrid=xgrid,
         protected=protected,
+        data_index=data_index,
     )