Skip to content

Commit 8a00917

Browse files
authored
Merge pull request #1430 from NNPDF/removing_cpp_thpredictions
Change ThPredictions to python predictions
2 parents 135a75b + ab2242d commit 8a00917

File tree

14 files changed

+613
-580
lines changed

14 files changed

+613
-580
lines changed

validphys2/examples/cuts_options.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ cuts_intersection_spec:
2727
dataset_input: {dataset: ATLAS_1JET_8TEV_R06_DEC}
2828

2929
dataspecs:
30-
- speclabel: "No cuts"
31-
use_cuts: "nocuts"
30+
# A special cuts option is "no cuts", although not all actions
31+
# are compatible with no cuts at all
32+
# - speclabel: "No cuts"
33+
# use_cuts: "nocuts"
3234

3335
- speclabel: "Fit cuts"
3436
use_cuts: "fromfit"

validphys2/examples/data_theory_comparison.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ pdfs:
99

1010
theoryid: 162
1111

12-
use_cuts: "nocuts"
12+
use_cuts: "internal"
1313

1414
dataset_inputs:
1515
- { dataset: BCDMSP}
16+
- { dataset: H1HERAF2B}
17+
- { dataset: ZEUSHERAF2B}
1618

1719
template: dthcomparison.md
1820

validphys2/examples/export_data.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ meta:
77
keywords: [Guilty]
88
author: Lazy Person
99

10-
use_cuts: "nocuts"
10+
use_cuts: "internal"
1111

1212
pdf: NNPDF40_nlo_as_01180
1313

validphys2/examples/looping_example.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pdfs:
1616
theoryids:
1717
- 208
1818
- 162
19-
use_cuts : nocuts
19+
use_cuts : internal
2020

2121
dataset_inputs:
2222
- { dataset: LHCBWZMU7TEV, cfac: [NRM] }

validphys2/examples/plot_phi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pdf: {id: "NNPDF40_nnlo_as_01180", label: "4.0 NNLO"}
77

88
theoryid: 162
99

10-
use_cuts : nocuts
10+
use_cuts : internal
1111

1212
dataset_inputs:
1313
- { dataset: NMC }

validphys2/src/validphys/closuretest/multiclosure.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,11 @@ def internal_multiclosure_dataset_loader(
8484
data = dataset.load.__wrapped__(dataset)
8585

8686
fits_dataset_predictions = [
87-
ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=data)
87+
ThPredictionsResult.from_convolution(pdf, dataset)
8888
for pdf in fits_pdf
8989
]
9090
fits_underlying_predictions = ThPredictionsResult.from_convolution(
91-
multiclosure_underlyinglaw, dataset, loaded_data=data
91+
multiclosure_underlyinglaw, dataset
9292
)
9393

9494
# copy data to make t0 cov

validphys2/src/validphys/core.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,8 @@ def central_value(self):
790790
return np.mean(self.data, axis=0)
791791

792792
def std_error(self):
793-
return np.std(self.data, axis=0)
793+
# ddof == 1 to match libNNPDF behaviour
794+
return np.std(self.data, ddof=1, axis=0)
794795

795796
def moment(self, order):
796797
return np.mean(np.power(self.data-self.central_value(),order), axis=0)

validphys2/src/validphys/covmats.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
530530
>>> np.allclose(a == b)
531531
True
532532
"""
533-
loaded_data = dataset.load()
534-
th = ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=loaded_data)
533+
th = ThPredictionsResult.from_convolution(pdf, dataset)
535534
pdf_cov = np.cov(th._rawdata, rowvar=True)
536535
return pdf_cov + covmat_t0_considered
537536

validphys2/src/validphys/results.py

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88

99
from collections import OrderedDict, namedtuple
1010
from collections.abc import Sequence
11-
import itertools
1211
import logging
1312

1413
import numpy as np
1514
import pandas as pd
1615
import scipy.linalg as la
1716

18-
from NNPDF import ThPredictions, CommonData, Experiment
17+
from NNPDF import CommonData
1918
from reportengine.checks import require_one, remove_outer, check_not_empty
2019
from reportengine.table import table
2120
from reportengine import collect
@@ -35,6 +34,12 @@
3534
calc_phi,
3635
bootstrap_values,
3736
)
37+
from validphys.convolution import (
38+
predictions,
39+
central_predictions,
40+
PredictionsRequireCutsError,
41+
)
42+
3843

3944
log = logging.getLogger(__name__)
4045

@@ -45,10 +50,16 @@ class Result:
4550

4651
# TODO: Eventually,only one of (NNPDFDataResult, StatsResult) should survive
4752
class NNPDFDataResult(Result):
48-
"""A result fills its values from a libnnpf data object"""
53+
"""A result fills its values from a pandas dataframe
54+
For legacy (libNNPDF) compatibility, falls back to libNNPDF attributes"""
4955

50-
def __init__(self, dataobj):
51-
self._central_value = dataobj.get_cv()
56+
def __init__(self, dataobj=None, central_value=None):
57+
# This class is used by both validphys and libNNPDF objects
58+
# when central_value is not explictly passed, fallback to
59+
# libNNPDF object .get_cv()
60+
if central_value is None:
61+
central_value = dataobj.get_cv()
62+
self._central_value = np.array(central_value).reshape(-1)
5263

5364
@property
5465
def central_value(self):
@@ -72,8 +83,8 @@ def std_error(self):
7283

7384

7485
class DataResult(NNPDFDataResult):
75-
def __init__(self, dataobj, covmat, sqrtcovmat):
76-
super().__init__(dataobj)
86+
def __init__(self, dataobj, covmat, sqrtcovmat, central_value=None):
87+
super().__init__(dataobj, central_value=central_value)
7788
self._covmat = covmat
7889
self._sqrtcovmat = sqrtcovmat
7990

@@ -96,12 +107,22 @@ def sqrtcovmat(self):
96107

97108

98109
class ThPredictionsResult(NNPDFDataResult):
99-
def __init__(self, dataobj, stats_class, label=None):
110+
"""Class holding theory prediction
111+
For legacy purposes it still accepts libNNPDF datatypes, but prefers python-pure stuff
112+
"""
113+
def __init__(self, dataobj, stats_class, label=None, central_value=None):
100114
self.stats_class = stats_class
101115
self.label = label
102-
self._std_error = dataobj.get_error()
103-
self._rawdata = dataobj.get_data()
104-
super().__init__(dataobj)
116+
# Ducktype the input into numpy arrays
117+
try:
118+
self._rawdata = dataobj.to_numpy()
119+
# If the numpy conversion worked then we don't have a libNNPDF in our hands
120+
stats = stats_class(self._rawdata.T)
121+
self._std_error = stats.std_error()
122+
except AttributeError:
123+
self._std_error = dataobj.get_error()
124+
self._rawdata = dataobj.get_data()
125+
super().__init__(dataobj, central_value=central_value)
105126

106127
@property
107128
def std_error(self):
@@ -123,16 +144,28 @@ def make_label(pdf, dataset):
123144
return label
124145

125146
@classmethod
126-
def from_convolution(cls, pdf, dataset, loaded_pdf=None, loaded_data=None):
127-
if loaded_pdf is None:
128-
loaded_pdf = pdf.load()
129-
if loaded_data is None:
130-
loaded_data = dataset.load()
131-
th_predictions = ThPredictions(loaded_pdf, loaded_data)
147+
def from_convolution(cls, pdf, dataset):
148+
# This should work for both single dataset and whole groups
149+
try:
150+
datasets = dataset.datasets
151+
except AttributeError:
152+
datasets = (dataset,)
153+
154+
try:
155+
all_preds = []
156+
all_centrals = []
157+
for d in datasets:
158+
all_preds.append(predictions(d, pdf))
159+
all_centrals.append(central_predictions(d, pdf))
160+
except PredictionsRequireCutsError as e:
161+
raise PredictionsRequireCutsError("Predictions from FKTables always require cuts, "
162+
"if you want to use the fktable intrinsic cuts set `use_cuts: 'internal'`") from e
163+
th_predictions = pd.concat(all_preds)
164+
central_values = pd.concat(all_centrals)
132165

133166
label = cls.make_label(pdf, dataset)
134167

135-
return cls(th_predictions, pdf.stats_class, label)
168+
return cls(th_predictions, pdf.stats_class, label, central_value=central_values)
136169

137170

138171
class PositivityResult(StatsResult):
@@ -454,7 +487,7 @@ def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat):
454487
data = dataset.load()
455488
return (
456489
DataResult(data, covariance_matrix, sqrt_covmat),
457-
ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=data),
490+
ThPredictionsResult.from_convolution(pdf, dataset),
458491
)
459492

460493

@@ -480,13 +513,9 @@ def pdf_results(
480513
"""Return a list of results, the first for the data and the rest for
481514
each of the PDFs."""
482515

483-
data = dataset.load()
484-
th_results = []
485-
for pdf in pdfs:
486-
th_result = ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=data)
487-
th_results.append(th_result)
516+
th_results = [ThPredictionsResult.from_convolution(pdf, dataset) for pdf in pdfs]
488517

489-
return (DataResult(data, covariance_matrix, sqrt_covmat), *th_results)
518+
return (DataResult(dataset.load(), covariance_matrix, sqrt_covmat), *th_results)
490519

491520

492521
@require_one("pdfs", "pdf")
2 Bytes
Loading

0 commit comments

Comments
 (0)