Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/944.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Enhanced phase-dispersion-minimisation search by enabling weighted events.
80 changes: 50 additions & 30 deletions stingray/pulse/pulsar.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,17 @@ def phase_exposure(start_time, stop_time, period, nbin=16, gti=None):
return expo / np.max(expo)


def fold_events(times, *frequency_derivatives, **opts):
def fold_events(
times,
*frequency_derivatives,
mode="ef",
nbin=16,
weights=1,
gti=None,
ref_time=0,
expocorr=False,
variances=None,
):
"""Epoch folding with exposure correction.

By default, the keyword `times` accepts a list of
Expand All @@ -232,21 +242,21 @@ def fold_events(times, *frequency_derivatives, **opts):
nbin : int, optional, default 16
The number of bins in the pulse profile

weights : float or array of floats, optional
weights : float or array of floats, optional, default 1
The weights of the data. It can either be specified as a single value
for all points, or an array with the same length as ``time``

gti : [[gti0_0, gti0_1], [gti1_0, gti1_1], ...], optional
gti : [[gti0_0, gti0_1], [gti1_0, gti1_1], ...], optional, default None
Good time intervals

ref_time : float, optional, default 0
Reference time for the timing solution

expocorr : bool, default False
expocorr : bool, optional, default False
Correct each bin for exposure (use when the period of the pulsar is
comparable to that of GTIs)

mode : str, ["ef", "pdm"], default "ef"
mode : str, ["ef", "pdm"], optional, default "ef"
Whether to calculate the epoch folding or phase dispersion
minimization folded profile. For "ef", it calculates the (weighted)
sum of the data points in each phase bin, for "pdm", the variance
Expand All @@ -264,23 +274,12 @@ def fold_events(times, *frequency_derivatives, **opts):
The uncertainties on the pulse profile
"""

mode = opts.pop("mode", "ef")
nbin = opts.pop("nbin", 16)
weights = opts.pop("weights", 1)
# If no key is passed, *or gti is None*, defaults to the
# initial and final event
gti = opts.pop("gti", None)
if gti is None:
gti = [[times[0], times[-1]]]
# Be safe if gtis are a list
gti = np.asanyarray(gti)
ref_time = opts.pop("ref_time", 0)
expocorr = opts.pop("expocorr", False)

if opts:
raise ValueError(
f"Unidentified keyword(s) to fold_events: {', '.join([k for k in opts.keys()])} \n Please refer to the description of the function for optional parameters."
)

if not isinstance(weights, Iterable):
weights *= np.ones(len(times))
Expand Down Expand Up @@ -315,23 +314,38 @@ def fold_events(times, *frequency_derivatives, **opts):
raw_profile_err = raw_profile_err / expo_norm

elif mode == "pdm":
# Convert the weights to a numpy array just in case it is a list
weights = np.asarray(weights)
if np.allclose(weights, 1.0):
raise ValueError(
"Can only calculate PDM for binned light curves!"
+ "`weights` attribute must be set to fluxes!"
)

raw_profile, bins, bin_idx = scipy.stats.binned_statistic(
phases, weights, statistic=np.var, bins=np.linspace(0, 1, nbin + 1)
# For weighted pdm, the measured uncertainties (variances) for each event are required
# set the event_weights as 1/variances or else 1s if variances=None
event_weights = (
(1.0 / np.asarray(variances)) if variances is not None else np.ones_like(weights)
)

# I need the variance uncorrected for the number of data points in each
# bin, so I need to find that first, and then multiply
# bin_idx should be from the list [1, ..., nbin], although some might not appear
# histogram bin-edges set as [0.5, 1.5, ..., nbin - 0.5, nbin + 0.5]
# to include the bin_idx int values
bincounts, _ = np.histogram(bin_idx, bins=np.arange(0.5, nbin + 1.5))
raw_profile = raw_profile * bincounts
bins = np.linspace(0, 1, nbin + 1)
# phases are already fractional values in [0, 1]
# casting float to int removes the fractional part
bin_indices = (phases * nbin).astype(int)
# to wrap: for phase = 1 then bin_index = nbin; clipping changes it to bin_index = 0
bin_indices = np.clip(bin_indices, 0, nbin - 1)

# for efficiency, the sum-of-squared deviations for each bin is split
# sosdev = sum_in_bin( (values_in_bin - mean_in_bin)**2 )
# mean_in_bin = sum_in_bin(values_in_bin) / n_in_bin
# so need n_in_bin, sum_in_bin, sos_in_bin (sum-of-squares)
# now compute the stats for each bin using bincount
# minlength=nbin prevents array shortening when a bin index is not represented in bin_indices
n_in_bin = np.bincount(bin_indices, weights=event_weights, minlength=nbin)
sum_in_bin = np.bincount(bin_indices, weights=weights * event_weights, minlength=nbin)
sos_in_bin = np.bincount(bin_indices, weights=weights**2 * event_weights, minlength=nbin)
# put it together to make sum-of-squared deviations: sosdev; avoid division by zero
raw_profile = sos_in_bin - sum_in_bin**2 / np.maximum(n_in_bin, 1)
Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 1 in np.maximum should be documented or extracted as a constant. This prevents division by zero when bins are empty, but this behavior should be explicitly documented in a comment.

Copilot uses AI. Check for mistakes.

# dummy array for the error, which we don't have for the variance
raw_profile_err = np.zeros_like(raw_profile)
Expand Down Expand Up @@ -369,7 +383,7 @@ def ef_profile_stat(profile, err=None):
return np.sum((profile - mean) ** 2 / err**2)


def pdm_profile_stat(profile, sample_var, nsample):
def pdm_profile_stat(profile, sum_dev2, nsample):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is changing the name of the variable needed? Also, is the variable something different from before?

"""Calculate the phase dispersion minimization
statistic following Stellingwerf (1978)

Expand All @@ -379,8 +393,9 @@ def pdm_profile_stat(profile, sample_var, nsample):
The PDM pulse profile (variance as a function
of phase)

sample_var : float
The total population variance of the sample
sum_dev2 : float
The sum-of-squared-deviations of the sample,
which uses the grand (whole sample together) mean

nsample : int
The number of time bins in the initial time
Expand All @@ -391,8 +406,13 @@ def pdm_profile_stat(profile, sample_var, nsample):
stat : float
The epoch folding statistics
"""
s2 = np.sum(profile) / (nsample - len(profile))
stat = s2 / sample_var
# Get the mean-of-squared-deviations from sum-of-squared-deviations by
# considering the number of degrees of freedom:
# nsample - nbin for the in_bin sum_dev2 because there are nbin means used
# nsample - 1 for the grand sum_dev2 because there is 1 grand mean used
Comment on lines +411 to +412
Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment uses 'sum_dev2' which is not a variable name in this function. It should reference 'profile' and 'sum_dev2' parameter names for clarity.

Suggested change
# nsample - nbin for the in_bin sum_dev2 because there are nbin means used
# nsample - 1 for the grand sum_dev2 because there is 1 grand mean used
# nsample - nbin for the in-bin sum of squared deviations (from profile) because there are nbin means used
# nsample - 1 for the grand sum of squared deviations (from sum_dev2) because there is 1 grand mean used

Copilot uses AI. Check for mistakes.
mean_dev2_in_bin = np.sum(profile) / (nsample - len(profile))
mean_dev2_grand = sum_dev2 / (nsample - 1)
stat = mean_dev2_in_bin / mean_dev2_grand
return stat


Expand Down
29 changes: 25 additions & 4 deletions stingray/pulse/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,15 @@ def stat_fun(t, f, fd=0, **kwargs):


def phase_dispersion_search(
times, flux, frequencies, nbin=128, segment_size=5000, expocorr=False, gti=None, fdots=0
times,
flux,
frequencies,
nbin=128,
segment_size=5000,
expocorr=False,
gti=None,
fdots=0,
variances=None,
):
"""Performs folding at trial frequencies in time series data (i.e.~a light curve
of flux or photon counts) and computes the Phase Dispersion Minimization statistic.
Expand Down Expand Up @@ -227,9 +235,21 @@ def phase_dispersion_search(

def stat_fun(t, f, fd=0, **kwargs):
bins, profile, _ = fold_events(t, f, fd, **kwargs, mode="pdm")
len_flux = len(kwargs["weights"])
sigma = np.var(kwargs["weights"]) * len_flux / (len_flux - 1)
return pdm_profile_stat(profile, sigma, len_flux)
flux = kwargs["weights"]
len_flux = len(flux)
# Get the local (possibly modified copy of variances) from kwargs rather than as a closure
Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is unclear. The code is not getting a 'modified copy' but rather retrieving the variances parameter from kwargs. Consider clarifying: 'Get variances parameter from kwargs for local use in PDM calculation'

Suggested change
# Get the local (possibly modified copy of variances) from kwargs rather than as a closure
# Get variances parameter from kwargs for local use in PDM calculation

Copilot uses AI. Check for mistakes.
variances_local = kwargs.get("variances", None)
event_weights = (
(1.0 / np.asarray(variances_local))
if variances_local is not None
else np.ones_like(flux)
)
mean_grand = np.average(flux, weights=event_weights)
dev2_grand = (flux - mean_grand) ** 2

sosdev_grand = np.dot(dev2_grand, event_weights)

return pdm_profile_stat(profile, sosdev_grand, len_flux)

return _folding_search(
stat_fun,
Expand All @@ -242,6 +262,7 @@ def stat_fun(t, f, fd=0, **kwargs):
gti=gti,
nbin=nbin,
fdots=fdots,
variances=variances,
)


Expand Down
8 changes: 5 additions & 3 deletions stingray/pulse/tests/test_pulse.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,10 @@ def test_stat(self):
def test_pdm_stat(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please test that the new keyword to folding_search also works.

"""Test pulse phase calculation, frequency only."""
prof = np.array([1, 1, 1, 1, 1])
sample_var = 2.0
sample_var = 2
nsample = 10
np.testing.assert_array_almost_equal(pdm_profile_stat(prof, sample_var, nsample), 0.5)
sum_dev2 = sample_var * (nsample - 1)
np.testing.assert_array_almost_equal(pdm_profile_stat(prof, sum_dev2, nsample), 0.5)

def test_zn(self):
"""Test pulse phase calculation, frequency only."""
Expand Down Expand Up @@ -221,7 +222,8 @@ def test_pulse_profile_pdm(self):
)
for pdm, ef in zip(profile, profile_ef):
if ef == 0:
assert np.isnan(pdm)
# PDM implementation avoids division by 0, instead returns 0
assert pdm == 0
else:
assert pdm == 2
assert ef == 8
Expand Down
2 changes: 1 addition & 1 deletion stingray/pulse/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_plot_phaseogram_direct(self):

def test_search_wrong_key_fails(self):
with pytest.raises(
ValueError, match=r"Unidentified keyword\(s\) to fold_events: fdot, fddot"
TypeError, match=r"fold_events\(\) got an unexpected keyword argument 'fdot'"
):
phase, prof, _ = fold_events(self.event_times, self.pulse_frequency, fdot=12, fddot=34)

Expand Down