add the possibility of getting also the CV

scarlehoff · scarlehoff · commit 2c58d49f4b7b · 2021-10-14T12:44:04.000+02:00
diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
@@ -9,6 +9,7 @@
 from __future__ import generator_stop
 
 from collections import namedtuple
+from contextlib import contextmanager
 import re
 import enum
 import functools
@@ -78,10 +79,33 @@ def __dir__(self):
 PDFSETS = _PDFSETS()
 
 class PDF(TupleComp):
+    """Wrapper class for the validphys PDF object to easily manage
+    both Monte Carlo and Hessian sets
+
+    Offers a context manager (``enable_central_value``) to include the central value
+    in Monte Carlo sets.
+
+    Examples
+    --------
+    >>> from validphys.api import API
+    >>> from validphys.convolution import predictions
+    >>> args = {"dataset_input":{"dataset": "ATLASTTBARTOT"}, "theoryid":200, "use_cuts":"internal"}
+    >>> ds = API.dataset(**args)
+    >>> pdf = API.pdf(pdf="NNPDF40_nnlo_as_01180")
+    >>> with pdf.enable_central_value():
+    >>>     preds_with_cv = predictions(ds, pdf)
+    >>> preds_no_cv = predictions(ds, pdf)
+    >>> len(preds_with_cv.columns)
+    101
+    >>> len(preds_no_cv.columns)
+    100
+    """
 
     def __init__(self, name):
         self.name = name
         self._plotname = name
+        self._lhapdfset = None
+        self._include_cv = False
         super().__init__(name)
 
 
@@ -145,13 +169,14 @@ def rescale_factor(self):
         else:
             return 1
 
-    @functools.lru_cache(maxsize=16)
     def load(self):
-        return LHAPDFSet(self.name, self.nnpdf_error)
+        if self._lhapdfset is None:
+            self._lhapdfset = LHAPDFSet(self.name, self.nnpdf_error)
+        return self._lhapdfset
 
     @functools.lru_cache(maxsize=2)
     def load_t0(self):
-        """Load the PDF as a t0 set"""
+        """Reload the PDF as a t0 set"""
         return LHAPDFSet(self.name, LHAPDFSet.erType_ER_MCT0)
 
     def __str__(self):
@@ -164,7 +189,7 @@ def __len__(self):
 
     @property
     def nnpdf_error(self):
-        """Return the NNPDF error tag, used to build the `LHAPDFSet` objeect"""
+        """Return the NNPDF error tag, used to build the `LHAPDFSet` object"""
         error = self.ErrorType
         if error == "replicas":
             return LHAPDFSet.erType_ER_MC
@@ -203,6 +228,8 @@ def grid_values_index(self):
         len(pdf))`` for Monte Carlo sets, because replica 0 is not selected
         and ``range(0, len(pdf))`` for hessian sets.
 
+        If ``include_cv`` is set to True, add a central value column for member 0
+        for Monte Carlo error sets
 
         Returns
         -------
@@ -215,7 +242,10 @@ def grid_values_index(self):
         """
         err = self.nnpdf_error
         if err is LHAPDFSet.erType_ER_MC:
-            return range(1, len(self))
+            if self._include_cv:
+                return ["CV"] + list(range(1, len(self)))
+            else:
+                return range(1, len(self))
         elif err in (LHAPDFSet.erType_ER_SYMEIG, LHAPDFSet.erType_ER_EIG, LHAPDFSet.erType_ER_EIG90):
             return range(0, len(self))
         else:
@@ -228,6 +258,17 @@ def get_members(self):
         """
         return len(self.grid_values_index)
 
+    @contextmanager
+    def enable_central_value(self):
+        """Context manager within which the central value is included
+        regardless of the error type of the PDF set"""
+        # Get a reference to the base PDF set of this class
+        pdfset = self.load()
+        pdfset.include_cv = True
+        self._include_cv = True
+        yield
+        self._include_cv = False
+        pdfset.include_cv = False
 
 
 kinlabels_latex = CommonData.kinLabel_latex.asdict()
diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py
@@ -62,20 +62,40 @@ class PDFErrorType(NamedTuple):
 
 
 class LHAPDFSet:
-    """Wrapper for the lhapdf python interface"""
+    """Wrapper for the lhapdf python interface.
+
+    Once instantiated this class will load the PDF set according to whether it is to be
+    treated as a T0 set (only the CV) or not.
+
+    It is possible to control the LHAPDF verbosity with the flag ``lhapdf_verbosity``.
+
+    For Monte Carlo sets the central value (member 0) is by default not included when taking
+    the resutls for all members (i.e., when using ``grid_values``).
+    However, it is possible to add member 0 by changing the ``include_cv`` attribute to True.
+
+    Temporarily: it exposes all libNNPDF attributes that were exposed and used prior to
+    the introduction of this class
+    """
 
     def __init__(self, name, error_type, lhapdf_verbosity=0):
+        log.info("PDF: %s ErrorType: %s", name, error_type.description)
         if isinstance(error_type, int):
             # libNNPDF error types were int
             error_type = _libNNPDF_errors[error_type]
         self._name = name
         self._error_type = error_type
+        # If at this point we already know this is a T0 set, load only the CV
+        if error_type.t0:
+            self._lhapdf_set = [lhapdf.mkPDF(name)]
+        else:
+            self._lhapdf_set = lhapdf.mkPDFs(name)
         self._flavors = None
-        self._lhapdf_set = lhapdf.mkPDFs(name)
         self._libNNPDF_set = None
-        self.legacy_interface()
-        log.info("PDF: %s ErrorType: %s", name, error_type.description)
+        self.include_cv = False
+        # Set the verbosity of LHAPDF
         lhapdf.setVerbosity(lhapdf_verbosity)
+        # Prepare a Legacy Interface
+        self.legacy_interface()
 
     def legacy_interface(self):
         """Setting some methods and attribute as per libNNPDF specs"""
@@ -105,7 +125,7 @@ def members(self):
         """
         if self.is_t0:
             return self._lhapdf_set[0:1]
-        if self.is_monte_carlo:
+        if self.is_monte_carlo and not self.include_cv:
             return self._lhapdf_set[1:]
         return self._lhapdf_set
 
@@ -139,11 +159,15 @@ def flavors(self):
             self._flavors = self.members[0].flavors()
         return self._flavors
 
-    def grid_values(self, flavors, xgrid, qgrid):
+    def grid_values(self, flavors: np.ndarray, xgrid: np.ndarray, qgrid: np.ndarray):
         """Reimplementation of libNNPDF grid_values
         The return shape is
             (members, flavors, xgrid, qgrid)
 
+        Return
+        ------
+            ndarray of shape (members, flavors, xgrid, qgrid)
+
         Examples
         --------
         >>> import numpy as np
diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
@@ -48,11 +48,13 @@ class NNPDFDataResult(Result):
     """A result fills its values from a pandas dataframe
     For legacy (libNNPDF) compatibility, falls back to libNNPDF attributes"""
 
-    def __init__(self, dataobj=None, central_value=None):
+    def __init__(self, dataobj):
         # This class is used by both validphys and libNNPDF objects
-        # when central_value is not explictly passed, fallback to
-        # libNNPDF object .get_cv()
-        if central_value is None:
+        # At this point only the result of the ThPredictions is a vp object
+        # which includes a special CV column which needs to be pop'd
+        try:
+            central_value = dataobj.pop("CV")
+        except AttributeError:
             central_value = dataobj.get_cv()
         self._central_value = np.array(central_value).squeeze()
 
@@ -78,8 +80,8 @@ def std_error(self):
 
 
 class DataResult(NNPDFDataResult):
-    def __init__(self, dataobj, covmat, sqrtcovmat, central_value=None):
-        super().__init__(dataobj, central_value=central_value)
+    def __init__(self, dataobj, covmat, sqrtcovmat):
+        super().__init__(dataobj)
         self._covmat = covmat
         self._sqrtcovmat = sqrtcovmat
 
@@ -105,17 +107,19 @@ class ThPredictionsResult(NNPDFDataResult):
     """Class holding theory prediction
     For legacy purposes it still accepts libNNPDF datatypes, but prefers python-pure stuff
     """
-    def __init__(self, dataobj, stats_class, label=None, central_value=None):
+    def __init__(self, dataobj, stats_class, label=None):
+        super().__init__(dataobj)
         self.stats_class = stats_class
         self.label = label
-        # Ducktype the input into numpy arrays
+        # Ducktype the input into numpy arrays for the rawdata
+        # TODO: once all of them are dataframes, the rawdata could be made of
+        # dataframes as well
         try:
             self._std_error = dataobj.std(axis=1).to_numpy()
             self._rawdata = dataobj.to_numpy()
         except AttributeError:
             self._std_error = dataobj.get_error()
             self._rawdata = dataobj.get_data()
-        super().__init__(dataobj, central_value=central_value)
 
     @property
     def std_error(self):
@@ -145,20 +149,18 @@ def from_convolution(cls, pdf, dataset):
             datasets = (dataset,)
 
         try:
-            all_preds = []
-            all_centrals = []
-            for d in datasets:
-                all_preds.append(predictions(d, pdf))
-                all_centrals.append(central_predictions(d, pdf))
+            with pdf.enable_central_value():
+                th_predictions = pd.concat(predictions(d, pdf) for d in datasets)
         except PredictionsRequireCutsError as e:
             raise PredictionsRequireCutsError("Predictions from FKTables always require cuts, "
                     "if you want to use the fktable intrinsic cuts set `use_cuts: 'internal'`") from e
-        th_predictions = pd.concat(all_preds)
-        central_values = pd.concat(all_centrals)
+	    # For Hessian sets        
+        if "CV" not in th_predictions:
+            th_predictions["CV"] = th_predictions[0]
 
         label = cls.make_label(pdf, dataset)
 
-        return cls(th_predictions, pdf.stats_class, label, central_value=central_values)
+        return cls(th_predictions, pdf.stats_class, label)
 
 
 class PositivityResult(StatsResult):