Merge pull request #933 from StingraySoftware/gti_corr

matteobachetti · web-flow · commit 4884be2b9568 · 2025-08-14T20:07:14.000Z
Gti corrected power spectrum for long, gappy observations
diff --git a/docs/changes/933.feature.rst b/docs/changes/933.feature.rst
@@ -0,0 +1 @@
+GtiCorrPowerspectrum is a new class to calculate long-term power spectra from gappy data
diff --git a/pyproject.toml b/pyproject.toml
@@ -75,7 +75,7 @@ all = [
     "tinygp",
     "jaxns",
     "etils",
-    "tensorflow_probability",
+    "tfp-nightly",
     "typing_extensions",
 ]
 docs = [
diff --git a/stingray/powerspectrum.py b/stingray/powerspectrum.py
@@ -1,16 +1,18 @@
+import copy
 import warnings
 from collections.abc import Generator, Iterable
 
 import numpy as np
 import scipy
 import scipy.optimize
 import scipy.stats
+import matplotlib.pyplot as plt
 
 from stingray.crossspectrum import AveragedCrossspectrum, Crossspectrum, DynamicalCrossspectrum
-from stingray.stats import pds_probability, amplitude_upper_limit
+from stingray.stats import pds_probability, amplitude_upper_limit, pds_detection_level
 
 from .events import EventList
-from .gti import cross_two_gtis, time_intervals_from_gtis
+from .gti import cross_two_gtis, time_intervals_from_gtis, create_gti_mask
 
 from .lightcurve import Lightcurve
 from .fourier import avg_pds_from_iterable, unnormalize_periodograms
@@ -1183,6 +1185,203 @@ def power_colors(
         )
 
 
+class GtiCorrPowerspectrum(Powerspectrum):
+    main_array_attr = "freq"
+    type = "powerspectrum"
+
+    """Calculate the power spectrum of gappy light curves.
+
+    GtiCorrPowerspectrum computes the power spectrum of gappy light curves,
+    cleaning up the frequencies that are more affected by gaps.
+    Optionally, it fills bad time intervals (BTIs) with the mean count rate from
+    good time intervals (GTIs), mitigating window-induced features in the periodogram.
+    By analyzing the visibility light curve (synthetic light curve with constant mean
+    counts in GTIs), the class identifies strong peaks in the periodogram that correspond to
+    the missing data and applies notch filtering to the power spectrum to remove these
+    frequencies.
+    Additionally, it rescales the power spectrum to account for the number of bins in and out GTIs,
+    ensuring accurate white noise and rms estimation.
+
+    The detailed explanation of the method is given in
+    `El Byad et al. 2025 <https://arxiv.org/pdf/2505.16921>`__
+
+    Parameters
+    ----------
+    *args:
+        Any arguments that can be passed to ``Powerspectrum``
+
+    Other Parameters
+    ----------------
+    fill_lc: boolean, optional, default ``True``
+        If True, fill the BTIs of the light curve with the mean value of the counts in the GTIs.
+        Recommended.
+
+    sigma_threshold: float, optional, default ``3``
+        The sigma threshold for the detection of features in the power spectrum of the observing
+        window.
+
+    **kwargs:
+        Any other keyword arguments that can be passed to ``Powerspectrum``
+
+    Attributes
+    ----------
+    freq: numpy.ndarray
+        The array of mid-bin frequencies that the Fourier transform samples
+
+    power: numpy.ndarray
+        The array of power values
+
+    power_err: numpy.ndarray
+        The uncertainties of ``power``.
+        An approximation for each bin given by ``power_err= power/sqrt(m)``.
+        Where ``m`` is the number of power averaged in each bin (by frequency
+        binning, or averaging more than one spectra). Note that for a single
+        realization (``m=1``) the error is equal to the power.
+
+    df: float
+        The frequency resolution
+
+    m: int
+        The number of averaged cross-spectra amplitudes in each bin.
+
+    n: int
+        The number of data points/time bins in one segment of the light
+        curves.
+
+    k: array of int
+        The rebinning scheme if the object has been rebinned otherwise is set to 1.
+
+    nphots: float
+        The total number of photons in light curve
+
+    """
+
+    def __init__(self, *args, fill_lc=True, sigma_threshold=3, **kwargs):
+        dt = kwargs.pop("dt", None)
+        skip_checks = kwargs.pop("skip_checks", False)
+
+        if len(args) == 0:
+            self.lc = None
+        elif isinstance(args[0], EventList):
+            self.lc = args[0].to_lc(dt)
+        else:
+            self.lc = args[0]
+            if dt is None:
+                dt = self.lc.dt
+
+        if fill_lc:
+            self.fill_lc_with_mean()
+
+        self.sigma_threshold = sigma_threshold
+        if not skip_checks:
+            self.initial_checks(*args, dt=dt, **kwargs)
+
+        super().__init__(self.lc, *args[1:], dt=dt, **kwargs, skip_checks=True)
+
+        self.mjdref = None
+        if hasattr(self.lc, "mjdref"):
+            self.mjdref = self.lc.mjdref
+
+        if len(args) == 0:
+            self.mask = None
+            return
+
+        if fill_lc:
+            lc_mask = create_gti_mask(self.lc.time, self.lc.gti)
+            self.power *= lc_mask.size / np.count_nonzero(lc_mask)
+
+        self.mask = np.ones(self.power.size, dtype=bool)
+
+    def initial_checks(self, *args, **kwargs):
+        if self.lc is not None and not np.allclose(np.diff(self.lc.time), self.lc.dt):
+            raise ValueError(
+                "The time array in the light curve is not evenly spaced. "
+                "This is not supported by GtiCorrPowerspectrum."
+            )
+
+    def fill_lc_with_mean(self):
+        if self.lc is None:
+            return
+
+        mask = create_gti_mask(self.lc.time, self.lc.gti)
+        self.lc.counts = self.lc.counts.astype(float)
+        self.lc.counts[~mask] = np.mean(self.lc.counts[mask])
+
+    def clean_gti_features(self, plot=False, figname="gti_features"):
+        gti = getattr(self, "gti", None)
+        if gti is None:
+            raise AttributeError("GTI attribute is not set for this object.")
+        exposure = np.sum(gti[:, 1] - gti[:, 0])
+        ref_ctrate = self.nphots / exposure
+        self.exposure = exposure
+
+        lc = copy.deepcopy(self.lc)
+        lc.gti = np.array([[gti[0, 0], gti[-1, 1]]])
+        mask = create_gti_mask(lc.time, gti)
+        lc.counts = lc.counts.astype(float)
+        lc.counts[~mask] = 0
+        lc.counts[mask] = ref_ctrate * lc.dt
+
+        ps_gti = Powerspectrum(lc, norm="leahy")
+
+        # Correct the PS level to overcome the Nph from filling the lc with mean
+        prob = scipy.stats.norm.cdf(-self.sigma_threshold)
+        thresh = pds_detection_level(prob)
+
+        bad = ps_gti.power > thresh
+        if plot:
+            self._plot_gti_features(ps_gti, thresh, figname)
+        self.mask = self.mask & ~bad
+        newpow = self.apply_mask(self.mask)
+        return newpow
+
+    def _plot_gti_features(self, ps_gti, thresh, figname):
+        """Plot the features in the power spectrum of the GTI-corrected light curve.
+
+        This method generates a log-log plot of the power spectrum and saves it as a JPEG file.
+
+        Parameters
+        ----------
+        ps_gti : Powerspectrum
+            The power spectrum of the synthetic light curve having the mean counts inside GTIs
+            and 0 outside.
+        thresh : float
+            The threshold value for the power spectrum, above which features are considered significant.
+        figname : str
+            The name of the figure file to be saved (without extension).
+        """
+        fig = plt.figure(figname)
+        plt.loglog(ps_gti.freq, ps_gti.power)
+        plt.axhline(thresh)
+        plt.xlabel("Frequency (Hz)")
+        plt.ylabel(f"Power {ps_gti.norm}")
+        plt.savefig(figname + ".jpg")
+        plt.close(fig)
+
+    def rebin_log(self, *args, **kwargs):
+        """Rebin the power spectrum logarithmically and filter out NaN values.
+
+        This method overrides the parent class's `rebin_log` method by applying a mask
+        to remove any bins where the rebinned power is NaN.
+
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments passed to the parent class's `rebin_log` method.
+        **kwargs : dict
+            Keyword arguments passed to the parent class's `rebin_log` method.
+
+        Returns
+        -------
+        GtiCorrPowerspectrum
+            A new power spectrum object with rebinned frequencies and powers,
+            with NaN values filtered out.
+        """
+        new_ps = Powerspectrum.rebin_log(self, *args, **kwargs)
+        mask = ~np.isnan(new_ps.power)
+        return new_ps.apply_mask(mask)
+
+
 def powerspectrum_from_time_array(
     times,
     dt,
diff --git a/stingray/tests/test_powerspectrum.py b/stingray/tests/test_powerspectrum.py
@@ -3,6 +3,7 @@
 import copy
 import warnings
 import importlib
+import tempfile
 
 import pytest
 import matplotlib.pyplot as plt
@@ -11,6 +12,7 @@
 from stingray.events import EventList
 from stingray.utils import HAS_NUMBA
 from stingray import Powerspectrum, AveragedPowerspectrum, DynamicalPowerspectrum
+from stingray.powerspectrum import GtiCorrPowerspectrum
 from stingray.powerspectrum import powerspectrum_from_time_array
 from astropy.modeling.models import Lorentz1D
 from stingray.filters import filter_for_deadtime
@@ -381,6 +383,144 @@ def test_deadtime_corr(self):
         assert np.isclose(np.std(pds.power), 2 / np.sqrt(tmax / segment_size), rtol=0.1)
 
 
+class TestGtiCorrPowerspectrum(object):
+    @classmethod
+    def setup_class(cls):
+        """Set up a light curve and an event list with GTIs for testing GtiCorrPowerspectrum."""
+        tstart = 0.0
+        tend = 100.0
+        dt = 0.01
+
+        time = np.arange(tstart + 0.5 * dt, tend + 0.5 * dt, dt)
+
+        mean_count_rate = 1000.0
+        mean_counts = mean_count_rate * dt
+
+        poisson_counts = rng.poisson(mean_counts, size=time.shape[0])
+
+        cls.lc = Lightcurve(time, counts=poisson_counts, gti=[[tstart, tend]], dt=dt)
+        cls.events = EventList(
+            np.sort(
+                np.random.uniform(
+                    tstart, tend, np.random.poisson(mean_count_rate * (tend - tstart))
+                )
+            ),
+            gti=[[tstart, tend]],
+        )
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps(self, norm):
+        """GtiCorrPowerspectrum results match Powerspectrum when GTIs are unimportant."""
+        gcps = GtiCorrPowerspectrum(self.lc, norm=norm)
+        ps = Powerspectrum(self.lc, norm=norm)
+        for attr in [
+            "freq",
+            "power",
+            "power_err",
+            "unnorm_power",
+            "unnorm_power_err",
+            "df",
+            "m",
+            "n",
+            "nphots",
+        ]:
+            assert np.array_equal(getattr(gcps, attr), getattr(ps, attr))
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps_events(self, norm):
+        """GtiCorrPowerspectrum results match Powerspectrum for event lists."""
+        gcps = GtiCorrPowerspectrum(self.events, dt=0.01, norm=norm)
+        ps = Powerspectrum(self.events, dt=0.01, norm=norm)
+        for attr in [
+            "freq",
+            "power",
+            "power_err",
+            "unnorm_power",
+            "unnorm_power_err",
+            "df",
+            "m",
+            "n",
+            "nphots",
+        ]:
+            assert np.array_equal(getattr(gcps, attr), getattr(ps, attr))
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps_fill_different(self, norm):
+        """Filling BTIs or not changes the power spectrum."""
+        lc = copy.deepcopy(self.lc)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        gcps_fill = GtiCorrPowerspectrum(lc, norm=norm, fill_lc=True)
+        gcps_nofill = GtiCorrPowerspectrum(lc, norm=norm, fill_lc=False)
+        gcps_fill = gcps_fill.clean_gti_features()
+        gcps_nofill = gcps_nofill.clean_gti_features()
+        mean_gcps_fill = np.mean(gcps_fill.power)
+        mean_gcps_nofill = np.mean(gcps_nofill.power)
+        assert not np.allclose(mean_gcps_fill, mean_gcps_nofill, rtol=0.01)
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps_fill(self, norm):
+        """Filling BTIs and correcting the normalization adjusts the power spectrum."""
+        lc = copy.deepcopy(self.lc)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        gcps = GtiCorrPowerspectrum(lc, norm=norm, fill_lc=True)
+        gcps = gcps.clean_gti_features()
+        ps = Powerspectrum(self.lc, norm=norm)
+        mean_ps = np.mean(ps.power)
+        mean_gcps = np.mean(gcps.power)
+        assert np.isclose(mean_gcps, mean_ps, rtol=0.01)
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps_fill_events(self, norm):
+        """Filling BTIs and correcting the normalization adjusts the power spectrum."""
+        lc = copy.deepcopy(self.events)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        gcps = GtiCorrPowerspectrum(lc, dt=0.01, norm=norm, fill_lc=True)
+        gcps = gcps.clean_gti_features()
+        ps = Powerspectrum(self.events, dt=0.01, norm=norm)
+        mean_ps = np.mean(ps.power)
+        mean_gcps = np.mean(gcps.power)
+        assert np.isclose(mean_gcps, mean_ps, rtol=0.01)
+
+    @pytest.mark.parametrize("norm", ["leahy", "frac", "abs", "none"])
+    def test_gti_corr_ps_fill_rebin(self, norm):
+        """Rebinning works on GtiCorrPowerspectrum."""
+        lc = copy.deepcopy(self.lc)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        gcps = GtiCorrPowerspectrum(lc, norm=norm, fill_lc=True)
+        gcps = gcps.clean_gti_features()
+        ps = Powerspectrum(self.lc, norm=norm)
+        ps = ps.rebin_log(0.01)
+        gcps = gcps.rebin_log(0.01)
+
+        mean_ps = np.mean(ps.power)
+        mean_gcps = np.mean(gcps.power)
+        assert np.isclose(mean_gcps, mean_ps, rtol=0.1)
+
+    def test_gti_corr_apply_gti_lc_fails(self):
+        """Applying GTIs to a light curve with gaps in the time array raises an error."""
+        lc = copy.deepcopy(self.lc)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        lc.apply_gtis()
+        with pytest.raises(ValueError, match="The time array in the light"):
+            GtiCorrPowerspectrum(lc, norm="leahy", fill_lc=True)
+
+    def test_gti_corr_plot(self):
+        """Plotting GtiCorrPowerspectrum works."""
+        lc = copy.deepcopy(self.events)
+        lc.gti = [[0, 30], [35, 100]]  # Two GTIs, one gap
+        gcps = GtiCorrPowerspectrum(lc, dt=0.01, norm="leahy", fill_lc=True)
+        with tempfile.NamedTemporaryFile(delete=False) as tmpfile:
+            figname = tmpfile.name
+        try:
+            gcps = gcps.clean_gti_features(plot=True, figname=figname)
+            jpg_name = figname + ".jpg"
+            assert os.path.exists(jpg_name)
+            os.unlink(jpg_name)
+        finally:
+            if os.path.exists(figname):
+                os.unlink(figname)
+
+
 class TestPowerspectrum(object):
     @classmethod
     def setup_class(cls):

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+GtiCorrPowerspectrum is a new class to calculate long-term power spectra from gappy data`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ all = [`
`75`	`75`	`"tinygp",`
`76`	`76`	`"jaxns",`
`77`	`77`	`"etils",`
`78`		`- "tensorflow_probability",`
	`78`	`+ "tfp-nightly",`
`79`	`79`	`"typing_extensions",`
`80`	`80`	`]`
`81`	`81`	`docs = [`