Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
92a62c4
Add dataset definition
tomaskontrimas Nov 7, 2024
7a1a9bb
Add temp notebook
tomaskontrimas Nov 7, 2024
ebf75a1
Add custom irfs path.
chiarabellenghi Nov 7, 2024
469adae
Change basepath
tomaskontrimas Nov 7, 2024
82a6c37
Save the script used to link irfs folders for testing
tomaskontrimas Nov 7, 2024
e5cfd31
Setting the default max gamma to 4.0
chiarabellenghi Dec 18, 2024
255cabf
Fix for missing reco energies in some true neutrino energy bins.
chiarabellenghi Feb 17, 2025
74e3358
Merge branch 'data_release' of github.com:icecube/skyllh into data_re…
chiarabellenghi Feb 17, 2025
5bb6f2b
Add warning about fitting gamma values beyond 4.
chiarabellenghi Feb 17, 2025
98671cc
Fix for missing reco energies in some true neutrino energy bins.
chiarabellenghi Feb 17, 2025
b0da467
Update 14y dataset collection name
chiarabellenghi Feb 25, 2025
9a83c8e
Change to the energy spline creation method for flexibility
chiarabellenghi Feb 26, 2025
0914f78
Adapt the rest to work with the new energy spline
chiarabellenghi Feb 26, 2025
44046dd
Bug fix.
chiarabellenghi Feb 27, 2025
8dabba2
Bug fix
chiarabellenghi Feb 27, 2025
96c8f2a
Check validity of lowest energy bin for detection probability calcula…
chiarabellenghi Apr 24, 2025
231ef25
change dataset energy bins to match csky
chiarabellenghi Apr 24, 2025
8fa0edf
Fix IC79 binning definitions for bkg PDF construction
chiarabellenghi Apr 30, 2025
6958d7c
Spline bkg histogragram for SoB ratio
chiarabellenghi Oct 18, 2025
8ccb49b
Backward compatibility fixes
chiarabellenghi Oct 18, 2025
6b51e19
Revert "Backward compatibility fixes"
chiarabellenghi Oct 18, 2025
690ce7d
Make old dataset future-compatible
chiarabellenghi Oct 18, 2025
0398b57
Merge branch 'data_release' of github.com:icecube/skyllh into data_re…
chiarabellenghi Oct 18, 2025
fb08766
Merge remote-tracking branch 'origin/master' into data_release
chiarabellenghi Oct 22, 2025
485ab02
Merge remote-tracking branch 'origin/master' into data_release
chiarabellenghi Oct 31, 2025
cc77cd2
Merge remote-tracking branch 'origin' into data_release
chiarabellenghi Mar 10, 2026
89924ad
remove energy PDF ratio cap_ratio for zero bkg PDF values
chiarabellenghi Mar 10, 2026
66743f9
cleanup and updated docs
chiarabellenghi Mar 10, 2026
74f564e
Remove cap_ratio
tomaskontrimas Mar 10, 2026
c3196a3
Fix typos
tomaskontrimas Mar 10, 2026
624a72b
Set default `cumulative_thr` value
tomaskontrimas Mar 10, 2026
14004ca
Delete no longer needed script
tomaskontrimas Mar 10, 2026
75aa21f
Cleanup dataset definitions and set aux_data for individual datasets
tomaskontrimas Mar 10, 2026
5020873
Fix typo
tomaskontrimas Mar 10, 2026
8f8f105
rename irfs files for IC86
chiarabellenghi Mar 11, 2026
04b9a3b
Update skyllh/analyses/i3/publicdata_ps/utils.py
chiarabellenghi Mar 11, 2026
9fcdada
remove commented out lines
chiarabellenghi Mar 11, 2026
8a2e526
Remove outdated comment
chiarabellenghi Mar 11, 2026
9396bd8
remove outdated bkg pdf evaluation
chiarabellenghi Mar 11, 2026
b15d364
Add explanation
tomaskontrimas Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
666 changes: 666 additions & 0 deletions doc/sphinx/tutorials/publicdata_ps_14yr.ipynb

Large diffs are not rendered by default.

13 changes: 8 additions & 5 deletions skyllh/analyses/i3/publicdata_ps/aeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,14 @@ def get_detection_prob_for_decnu(
enu_binedges = np.power(10, self.log10_enu_binedges)

# Get the bin indices for the lower and upper energy range values.
(lidx,) = get_bin_indices_from_lower_and_upper_binedges(
enu_binedges[:-1],
enu_binedges[1:],
np.array([enu_range_min])
)
if enu_range_min <= enu_binedges[0]:
lidx = 0
else:
(lidx,) = get_bin_indices_from_lower_and_upper_binedges(
enu_binedges[:-1],
enu_binedges[1:],
np.array([enu_range_min])
)
if enu_range_max >= enu_binedges[-1]:
uidx = len(enu_binedges)-1
else:
Expand Down
31 changes: 23 additions & 8 deletions skyllh/analyses/i3/publicdata_ps/backgroundpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
from skyllh.core.timing import (
TaskTimer,
)

from skyllh.analyses.i3.publicdata_ps.utils import (
FctSpline2D,
)

class PDBackgroundI3EnergyPDF(
EnergyPDF,
Expand Down Expand Up @@ -229,6 +231,8 @@ def __init__(

self._hist_logE_sinDec = h

self._pdf_spline = self._construct_conditional_pdf_spline()

@property
def hist_smoothing_method(self):
"""The instance of HistSmoothingMethod defining the smoothing filter of
Expand Down Expand Up @@ -276,6 +280,16 @@ def hist_mask_mc_covered_with_physics(self):
self._hist_mask_mc_covered &
~self._hist_mask_mc_covered_zero_physics)

def _construct_conditional_pdf_spline(self):
"""
"""
spline = FctSpline2D(
self._hist_logE_sinDec,
self.binnings[0].binedges,
self.binnings[1].binedges)

return spline

def initialize_for_new_trial(
self,
tdm,
Expand All @@ -285,16 +299,17 @@ def initialize_for_new_trial(
which has to be done only once for a particular trial data.
"""

logE_binning = self.get_binning('log_energy')
sinDec_binning = self.get_binning('sin_dec')
# logE_binning = self.get_binning('log_energy')
# sinDec_binning = self.get_binning('sin_dec')

logE_idx = np.digitize(
tdm['log_energy'], logE_binning.binedges) - 1
sinDec_idx = np.digitize(
tdm['sin_dec'], sinDec_binning.binedges) - 1
# logE_idx = np.digitize(
# tdm['log_energy'], logE_binning.binedges) - 1
# sinDec_idx = np.digitize(
# tdm['sin_dec'], sinDec_binning.binedges) - 1

with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'):
self._pd = self._hist_logE_sinDec[(logE_idx, sinDec_idx)]
# self._pd = self._hist_logE_sinDec[(logE_idx, sinDec_idx)]
self._pd = self._pdf_spline(tdm['log_energy'], tdm['sin_dec'], grid=False)
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FctSpline2D.__call__ defaults to renorm=True, and the grid=False renormalization path loops over every distinct sin_dec value to compute Z(y). In initialize_for_new_trial, tdm['sin_dec'] is typically continuous, so this becomes an O(N_events) Python loop and will be a major performance bottleneck. Consider calling the spline with renorm=False here (the histogram is already normalized per sin(dec) bin), or change the default to renorm=False and only renormalize on a coarse y-grid where needed.

Suggested change
self._pd = self._pdf_spline(tdm['log_energy'], tdm['sin_dec'], grid=False)
self._pd = self._pdf_spline(
tdm['log_energy'],
tdm['sin_dec'],
grid=False,
renorm=False,
)

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chiarabellenghi not sure about this, it sounds correct but also shouldn't impact performance too much

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see things being especially slow at the moment, maybe we can keep this as a possible enhancement for the future?


def assert_is_valid_for_trial_data(
self,
Expand Down
58 changes: 5 additions & 53 deletions skyllh/analyses/i3/publicdata_ps/pdfratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def __init__(
self,
sig_pdf_set,
bkg_pdf,
cap_ratio=False,
**kwargs):
"""Creates a PDFRatio instance for the public data.
It takes a signal PDF set for different discrete gamma values.
Expand All @@ -37,9 +36,6 @@ def __init__(
bkg_pdf : instance of PDDataBackgroundI3EnergyPDF
The PDDataBackgroundI3EnergyPDF instance holding the background
energy PDF.
cap_ratio : bool
Switch whether the S/B PDF ratio should get capped where no
background is available. Default is False.
"""
self._logger = get_logger(module_class_method_name(self, '__init__'))

Expand All @@ -53,35 +49,6 @@ def __init__(
func=self._get_ratio_values,
param_grid_set=sig_pdf_set.param_grid_set)

self.cap_ratio = cap_ratio
if self.cap_ratio:
self._logger.info('The energy PDF ratio will be capped!')

# Calculate the ratio value for the phase space where no background
# is available. We will take the p_sig percentile of the signal
# like phase space.
ratio_perc = 99

# Get the log10 reco energy values where the background pdf has
# non-zero values.
n_logE = bkg_pdf.get_binning('log_energy').nbins
n_sinDec = bkg_pdf.get_binning('sin_dec').nbins
bd = bkg_pdf._hist_logE_sinDec > 0
log10_e_bc = bkg_pdf.get_binning('log_energy').bincenters
self.ratio_fill_value_dict = dict()
for sig_pdf_key in sig_pdf_set.pdf_keys:
sigpdf = sig_pdf_set[sig_pdf_key]
sigvals = sigpdf.get_pd_by_log10_reco_e(log10_e_bc)
sigvals = np.broadcast_to(sigvals, (n_sinDec, n_logE)).T
r = sigvals[bd] / bkg_pdf._hist_logE_sinDec[bd]
# Remove possible inf values.
r = r[np.invert(np.isinf(r))]
val = np.percentile(r[r > 1.], ratio_perc)
self.ratio_fill_value_dict[sig_pdf_key] = val
self._logger.info(
f'The cap value for the energy PDF ratio key {sig_pdf_key} '
f'is {val}.')

# Create cache variables for the last ratio value and gradients in
# order to avoid the recalculation of the ratio value when the
# ``get_gradient`` method is called (usually after the ``get_ratio``
Expand All @@ -91,18 +58,6 @@ def __init__(
self._cache_ratio = None
self._cache_grads = None

@property
def cap_ratio(self):
"""Boolean switch whether to cap the ratio where no background
information is available (True) or use the smallest possible floating
point number greater than zero as background pdf value (False).
"""
return self._cap_ratio

@cap_ratio.setter
def cap_ratio(self, b):
self._cap_ratio = b

def _is_cached(
self,
tdm,
Expand Down Expand Up @@ -227,14 +182,11 @@ def _get_ratio_values(
where=m_nonzero_bkg,
out=ratio)

if self._cap_ratio:
ratio[m_zero_bkg] = self.ratio_fill_value_dict[sig_pdf_key]
else:
np.divide(
ratio,
np.finfo(np.double).resolution,
where=m_zero_bkg,
out=ratio)
np.divide(
ratio,
np.finfo(np.double).resolution,
where=m_zero_bkg,
out=ratio)

# Check for positive inf values in the ratio and set the ratio to a
# finite number. Here we choose the maximum value of float32 to keep
Expand Down
4 changes: 2 additions & 2 deletions skyllh/analyses/i3/publicdata_ps/signal_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ def create_energy_filter_mask(
if cut_sindec is None:
logger.warn(
'No `cut_sindec` has been specified. The energy cut will be '
'applied in [-90, 0] deg.')
cut_sindec = 0.
'applied in [-90, 90] deg.')
cut_sindec = np.sin(np.radians(90.1))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this returns:

np.sin(np.radians(90.1))
np.float64(0.9999984769132877)

I guess we could just set it to 1.0 or 1.1?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all this cut_sindec stuff can be removed. The new energy_cut_spline is doing the correct thing at all declinations

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I think about it, for the new data release, the entire cut to inject events in the southern sky at meaningful energies is no longer needed. There, the effective area has been set to zero at low energies. However, we should keep this function so that the signal injection still works correctly for the 10yr data release.


filter_mask = np.logical_and(
events['sin_dec'] < cut_sindec,
Expand Down
27 changes: 18 additions & 9 deletions skyllh/analyses/i3/publicdata_ps/signalpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def __init__(
# Select the slice of the smearing matrix corresponding to the
# source declination band.
true_dec_idx = sm.get_true_dec_idx(src_dec)
sm_pdf = sm.pdf[:, true_dec_idx]
sm_pdf_dec_slice = sm.pdf[:, true_dec_idx]

# Only look at true neutrino energies for which a recostructed
# muon energy distribution exists in the smearing matrix.
Expand Down Expand Up @@ -329,7 +329,7 @@ def __init__(
ang_err_bw = sm.ang_err_upper_edges - sm.ang_err_lower_edges

# Create the energy pdf for different gamma values.
def create_energy_pdf(sm_pdf, fluxmodel, gridparams):
def create_energy_pdf(sm_pdf_dec_slice, fluxmodel, gridparams):
"""Creates an energy pdf for a specific gamma value.
"""
# Create a copy of the FluxModel with the given flux parameters.
Expand Down Expand Up @@ -376,19 +376,28 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx):
# Create the energy PDF f_e = P(log10_E_reco|dec) =
# \int dPsi dang_err P(E_reco,Psi,ang_err).
f_e = np.sum(
sm_pdf[true_e_idx] *
sm_pdf_dec_slice[true_e_idx] *
psi_edges_bw[true_e_idx, true_dec_idx, :, :, np.newaxis] *
ang_err_bw[true_e_idx, true_dec_idx, :, :, :],
axis=(-1, -2)
)

# Build the spline for this P(E_reco|E_nu). Weigh the pdf
# with the true neutrino energy probability (flux prob).

log10_reco_e_binedges = sm.log10_reco_e_binedges[
true_e_idx, true_dec_idx]

p = f_e * true_e_prob[idx]
# Check that the reco energy PDF is not all zeros
if np.sum(f_e) == 0:
self._logger.warn(
'There is no distribution of reconstructed energies '
'for true neutrino energy {}. Assigning a sequance of '
'zeros.'.format(sm.log10_true_enu_binedges[true_e_idx]))
f_e = np.zeros_like(f_e)
log10_reco_e_binedges = np.linspace(2, 6, f_e.size+1)

# Build the spline for this P(E_reco|E_nu). Weigh the pdf
# with the true neutrino energy probability (flux prob).
p = f_e * true_e_prob[idx]

spline = FctSpline1D(p, log10_reco_e_binedges)

return spline(xvals)
Expand All @@ -408,7 +417,7 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx):
return pdf

args_list = [
((sm_pdf, fluxmodel, gridparams), {})
((sm_pdf_dec_slice, fluxmodel, gridparams), {})
for gridparams in self.gridparams_list
]

Expand All @@ -418,7 +427,7 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx):
ncpu=self.ncpu,
ppbar=ppbar)

del sm_pdf
del sm_pdf_dec_slice

# Save all the energy PDF objects in the PDFSet PDF registry with
# the hash of the individual parameters as key.
Expand Down
46 changes: 12 additions & 34 deletions skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def do_trials_with_em(
mean_n_sig=0,
gamma_src=2,
gamma_min=1,
gamma_max=5,
gamma_max=4,
n_gamma=21,
gauss=None,
box=None,
Expand Down Expand Up @@ -776,9 +776,6 @@ def create_analysis( # noqa: C901
gamma_max=5.,
kde_smoothing=False,
minimizer_impl="LBFGS",
cut_sindec=None,
spl_smooth=None,
cap_ratio=False,
compress_data=False,
keep_data_fields=None,
evt_sel_delta_angle_deg=10,
Expand All @@ -799,7 +796,7 @@ def create_analysis( # noqa: C901
analysis.
source : PointLikeSource instance
The PointLikeSource instance defining the point source position.
box : None or dictionary with start, end
box : None or dictionary with start, stop
None if no box shaped time pdf, else dictionary of the format
``{'start': float, 'stop': float}``.
gauss : None or dictionary with mu, sigma
Expand Down Expand Up @@ -832,19 +829,6 @@ def create_analysis( # noqa: C901
(L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or
"minuit" (Minuit minimizer used by the :mod:`iminuit` module).
Default: "LBFGS".
cut_sindec : list of float | None
sin(dec) values at which the energy cut in the southern sky should
start. If None, np.sin(np.radians([-2, 0, -3, 0, 0])) is used.
spl_smooth : list of float
Smoothing parameters for the 1D spline for the energy cut. If None,
[0., 0.005, 0.05, 0.2, 0.3] is used.
cap_ratio : bool
If set to True, the energy PDF ratio will be capped to a finite value
where no background energy PDF information is available. This will
ensure that an energy PDF ratio is available for high energies where
no background is available from the experimental data.
If kde_smoothing is set to True, cap_ratio should be set to False!
Default is False.
compress_data : bool
Flag if the data should get converted from float64 into float32.
keep_data_fields : list of str | None
Expand Down Expand Up @@ -941,6 +925,11 @@ def create_analysis( # noqa: C901
valmax=ns_max)

# Define the fit parameter gamma.
if gamma_max > 4.0:
logger.warn(
'You are allowing `gamma` values larger than 4.0. '
'For such soft spectra, we cannot garantee the correct '
'behaviour of the energy PDF.')
param_gamma = Parameter(
name='gamma',
initial=gamma_seed,
Expand Down Expand Up @@ -996,16 +985,6 @@ def create_analysis( # noqa: C901
shg_mgr=shg_mgr,
delta_angle=np.deg2rad(evt_sel_delta_angle_deg))

# Prepare the spline parameters for the signal generator.
if cut_sindec is None:
cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0]))
if spl_smooth is None:
spl_smooth = [0., 0.005, 0.05, 0.2, 0.3]
if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets):
raise AssertionError(
'The length of the spl_smooth and of the cut_sindec must be equal '
f'to the length of datasets: {len(datasets)}.')

# Add the data sets to the analysis.
pbar = ProgressBar(len(datasets), parent=ppbar).start()
for (ds_idx, ds) in enumerate(datasets):
Expand Down Expand Up @@ -1058,8 +1037,7 @@ def create_analysis( # noqa: C901
energy_pdfratio = PDSigSetOverBkgPDFRatio(
cfg=cfg,
sig_pdf_set=energy_sigpdfset,
bkg_pdf=energy_bkgpdf,
cap_ratio=cap_ratio)
bkg_pdf=energy_bkgpdf)

pdfratio = spatial_pdfratio * energy_pdfratio

Expand Down Expand Up @@ -1112,9 +1090,10 @@ def create_analysis( # noqa: C901
)

energy_cut_spline = create_energy_cut_spline(
ds,
data.exp,
spl_smooth[ds_idx])
ds=ds,
exp_data=data.exp,
spl_smooth=ds.get_aux_data('spline_smoothing'),
cumulative_thr=ds.get_aux_data('cumulative_threshold'))

sig_generator = TimeDependentPDDatasetSignalGenerator(
cfg=cfg,
Expand All @@ -1124,7 +1103,6 @@ def create_analysis( # noqa: C901
livetime=livetime,
time_flux_profile=time_flux_profile,
energy_cut_spline=energy_cut_spline,
cut_sindec=cut_sindec[ds_idx],
)

ana.add_dataset(
Expand Down
Loading