Skip to content

Commit 8e87ebe

Browse files
committed
WIP: Trying to enable this with binning
1 parent 446213b commit 8e87ebe

File tree

2 files changed

+76
-14
lines changed

2 files changed

+76
-14
lines changed

src/dmu/stats/gof_calculator.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
Module holding GofCalculator class
33
'''
44
from functools import lru_cache
5+
from typing import cast
56

67
import zfit
78
import numpy
89
import pandas as pnd
910

1011
from scipy import stats
11-
from zfit.core.basepdf import BasePDF as zpdf
12-
from zfit.core.parameter import Parameter as zpar
12+
from zfit.interface import ZfitBinnedData
13+
from zfit.interface import ZfitData as zdat
14+
from zfit.interface import ZfitPDF as zpdf
15+
from zfit.interface import ZfitParameter as zpar
1316
from dmu.logging.log_store import LogStore
1417

1518
log = LogStore.add_logger('dmu:stats:gofcalculator')
@@ -26,14 +29,35 @@ def __init__(self, nll, ndof : int = 10):
2629
self._pdf = self._pdf_from_nll()
2730
self._data_in = self._data_from_nll()
2831
self._data_np = self._data_np_from_data(self._data_in)
29-
self._data_zf = zfit.Data.from_numpy(obs=self._pdf.space, array=self._data_np)
32+
self._data_bn = self._data_bn_from_data(self._data_in)
33+
self._data_zf = self._get_zdata()
34+
# ----------------------
35+
def _get_zdata(self) -> zdat|ZfitBinnedData:
36+
'''
37+
Returns
38+
-------------
39+
Data usable by zfit to do fits
40+
'''
41+
if self._data_np is not None:
42+
return zfit.Data.from_numpy(obs=self._pdf.space, array=self._data_np)
43+
44+
if isinstance(self._data_in, ZfitBinnedData):
45+
return self._data_in
46+
47+
raise ValueError('Data is neither binned, nor convertible to numpy array')
3048
# ---------------------
31-
def _data_np_from_data(self, dat) -> numpy.ndarray:
49+
def _data_np_from_data(self, dat) -> numpy.ndarray|None:
3250
if isinstance(dat, numpy.ndarray):
3351
return dat
3452

53+
if isinstance(dat, zfit.data.BinnedData):
54+
log.info('Input data is binned, cannot convert to numpy')
55+
return None
56+
3557
if isinstance(dat, zfit.Data):
36-
return zfit.run(zfit.z.unstack_x(dat))
58+
arr_val = zfit.run(zfit.z.unstack_x(dat))
59+
arr_val = cast(numpy.ndarray, arr_val)
60+
return arr_val
3761

3862
if isinstance(dat, pnd.DataFrame):
3963
return dat.to_numpy()
@@ -44,6 +68,21 @@ def _data_np_from_data(self, dat) -> numpy.ndarray:
4468

4569
data_type = str(type(dat))
4670
raise ValueError(f'Data is not a numpy array, zfit.Data or pandas.DataFrame, but {data_type}')
71+
# ----------------------
72+
def _data_bn_from_data(self, dat) -> ZfitBinnedData|None:
73+
'''
74+
Parameters
75+
-------------
76+
dat: Zfit data
77+
78+
Returns
79+
-------------
80+
ZfitBinned data or None if input is not binned
81+
'''
82+
if isinstance(dat, ZfitBinnedData):
83+
return dat
84+
85+
return None
4786
# ---------------------
4887
def _pdf_from_nll(self) -> zpdf:
4988
l_model = self._nll.model
@@ -52,7 +91,7 @@ def _pdf_from_nll(self) -> zpdf:
5291

5392
return l_model[0]
5493
# ---------------------
55-
def _data_from_nll(self) -> zpdf:
94+
def _data_from_nll(self) -> zdat:
5695
l_data = self._nll.data
5796
if len(l_data) != 1:
5897
raise ValueError('Not found one and only one dataset')
@@ -81,7 +120,9 @@ def _get_binning(self) -> tuple[int, float, float]:
81120
# ---------------------
82121
def _get_pdf_bin_contents(self) -> numpy.ndarray:
83122
nbins, min_x, max_x = self._get_binning()
84-
_, arr_edg = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
123+
124+
if self._data_np is not None:
125+
_, arr_edg = numpy.histogram(self._data_np, bins = nbins, range=(min_x, max_x))
85126

86127
size = arr_edg.size
87128

@@ -139,7 +180,7 @@ def get_gof(self, kind : str) -> float:
139180
-----------------
140181
kind: Type of goodness of fit: pvalue, chi2, chi2/ndof
141182
142-
Returns
183+
Returns
143184
-----------------
144185
Goodness of fit of a given kind
145186
'''

tests/stats/test_gofcalculator.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy
99
import pytest
1010

11+
from zfit import Data as zdata
1112
from dmu.stats.gof_calculator import GofCalculator
1213
from dmu.logging.log_store import LogStore
1314

@@ -20,6 +21,7 @@ class Data:
2021
'''
2122
minimizer = zfit.minimize.Minuit()
2223
obs = zfit.Space('x', limits=(-10, 10))
24+
obs_bin = zfit.Space('x', limits=(-10, 10), binning=50)
2325
#---------------------------------------------
2426
@pytest.fixture(scope='session', autouse=True)
2527
def _initialize():
@@ -33,24 +35,43 @@ def _get_model():
3335

3436
return pdf
3537
# -------------------------------------------
36-
def _get_data():
38+
def _get_data() -> zdata:
3739
data_np = numpy.random.normal(0, 1, size=10000)
3840
data_zf = zfit.Data.from_numpy(obs=Data.obs, array=data_np)
3941

4042
return data_zf
4143
# -------------------------------------------
42-
def _get_nll():
44+
def _get_nll(binned : bool):
4345
pdf = _get_model()
4446
dat = _get_data()
45-
nll = zfit.loss.UnbinnedNLL(model=pdf, data=dat)
47+
48+
if binned:
49+
dat = dat.to_binned(space=Data.obs_bin)
50+
pdf = pdf.to_binned(space=Data.obs_bin)
51+
nll = zfit.loss.BinnedNLL(model=pdf, data=dat)
52+
else:
53+
nll = zfit.loss.UnbinnedNLL(model=pdf, data=dat)
4654

4755
return nll
4856
# -------------------------------------------
49-
def test_simple():
57+
def test_unbinned():
58+
'''
59+
Test GofCalculator with unbinned data
60+
'''
61+
nll = _get_nll(binned=False)
62+
res = Data.minimizer.minimize(nll)
63+
print(res)
64+
65+
gcl = GofCalculator(nll, ndof=10)
66+
gof = gcl.get_gof(kind='pvalue')
67+
68+
assert math.isclose(gof, 0.965, abs_tol=0.01)
69+
# -------------------------------------------
70+
def test_binned():
5071
'''
51-
Simplest test of GofCalculator
72+
Test GofCalculator with binned data
5273
'''
53-
nll = _get_nll()
74+
nll = _get_nll(binned=True)
5475
res = Data.minimizer.minimize(nll)
5576
print(res)
5677

0 commit comments

Comments
 (0)