22Module holding GofCalculator class
33'''
44from functools import lru_cache
5+ from typing import cast
56
67import zfit
78import numpy
89import pandas as pnd
910
1011from scipy import stats
11- from zfit .core .basepdf import BasePDF as zpdf
12- from zfit .core .parameter import Parameter as zpar
12+ from zfit .interface import ZfitBinnedData
13+ from zfit .interface import ZfitData as zdat
14+ from zfit .interface import ZfitPDF as zpdf
15+ from zfit .interface import ZfitParameter as zpar
1316from dmu .logging .log_store import LogStore
1417
1518log = LogStore .add_logger ('dmu:stats:gofcalculator' )
@@ -26,14 +29,35 @@ def __init__(self, nll, ndof : int = 10):
2629 self ._pdf = self ._pdf_from_nll ()
2730 self ._data_in = self ._data_from_nll ()
2831 self ._data_np = self ._data_np_from_data (self ._data_in )
29- self ._data_zf = zfit .Data .from_numpy (obs = self ._pdf .space , array = self ._data_np )
32+ self ._data_bn = self ._data_bn_from_data (self ._data_in )
33+ self ._data_zf = self ._get_zdata ()
34+ # ----------------------
35+ def _get_zdata (self ) -> zdat | ZfitBinnedData :
36+ '''
37+ Returns
38+ -------------
39+ Data usable by zfit to do fits
40+ '''
41+ if self ._data_np is not None :
42+ return zfit .Data .from_numpy (obs = self ._pdf .space , array = self ._data_np )
43+
44+ if isinstance (self ._data_in , ZfitBinnedData ):
45+ return self ._data_in
46+
47+ raise ValueError ('Data is neither binned, nor convertible to numpy array' )
3048 # ---------------------
31- def _data_np_from_data (self , dat ) -> numpy .ndarray :
49+ def _data_np_from_data (self , dat ) -> numpy .ndarray | None :
3250 if isinstance (dat , numpy .ndarray ):
3351 return dat
3452
53+ if isinstance (dat , zfit .data .BinnedData ):
54+ log .info ('Input data is binned, cannot convert to numpy' )
55+ return None
56+
3557 if isinstance (dat , zfit .Data ):
36- return zfit .run (zfit .z .unstack_x (dat ))
58+ arr_val = zfit .run (zfit .z .unstack_x (dat ))
59+ arr_val = cast (numpy .ndarray , arr_val )
60+ return arr_val
3761
3862 if isinstance (dat , pnd .DataFrame ):
3963 return dat .to_numpy ()
@@ -44,6 +68,21 @@ def _data_np_from_data(self, dat) -> numpy.ndarray:
4468
4569 data_type = str (type (dat ))
4670 raise ValueError (f'Data is not a numpy array, zfit.Data or pandas.DataFrame, but { data_type } ' )
71+ # ----------------------
72+ def _data_bn_from_data (self , dat ) -> ZfitBinnedData | None :
73+ '''
74+ Parameters
75+ -------------
76+ dat: Zfit data
77+
78+ Returns
79+ -------------
80+ ZfitBinned data or None if input is not binned
81+ '''
82+ if isinstance (dat , ZfitBinnedData ):
83+ return dat
84+
85+ return None
4786 # ---------------------
4887 def _pdf_from_nll (self ) -> zpdf :
4988 l_model = self ._nll .model
@@ -52,7 +91,7 @@ def _pdf_from_nll(self) -> zpdf:
5291
5392 return l_model [0 ]
5493 # ---------------------
55- def _data_from_nll (self ) -> zpdf :
94+ def _data_from_nll (self ) -> zdat :
5695 l_data = self ._nll .data
5796 if len (l_data ) != 1 :
5897 raise ValueError ('Not found one and only one dataset' )
@@ -81,7 +120,9 @@ def _get_binning(self) -> tuple[int, float, float]:
81120 # ---------------------
82121 def _get_pdf_bin_contents (self ) -> numpy .ndarray :
83122 nbins , min_x , max_x = self ._get_binning ()
84- _ , arr_edg = numpy .histogram (self ._data_np , bins = nbins , range = (min_x , max_x ))
123+
124+ if self ._data_np is not None :
125+ _ , arr_edg = numpy .histogram (self ._data_np , bins = nbins , range = (min_x , max_x ))
85126
86127 size = arr_edg .size
87128
@@ -139,7 +180,7 @@ def get_gof(self, kind : str) -> float:
139180 -----------------
140181 kind: Type of goodness of fit: pvalue, chi2, chi2/ndof
141182
142- Returns
183+ Returns
143184 -----------------
144185 Goodness of fit of a given kind
145186 '''
0 commit comments