Minor fix in docstring

JeanVanDyk · JeanVanDyk · commit 5ee3cb41de78 · 2025-06-04T10:58:22.000+02:00
diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py
@@ -25,12 +25,11 @@
 from sklearn.base import RegressorMixin
 
 from causalpy.custom_exceptions import BadIndexException
+from causalpy.experiments.base import BaseExperiment
 from causalpy.plot_utils import get_hdi_to_df, plot_xY
 from causalpy.pymc_models import PyMCModel
 from causalpy.utils import round_num
 
-from .base import BaseExperiment
-
 LEGEND_FONT_SIZE = 12
 
 
@@ -78,19 +77,27 @@ class InterruptedTimeSeries(BaseExperiment):
     def __init__(
         self,
         data: pd.DataFrame,
-        treatment_time: Union[int, float, pd.Timestamp],
+        treatment_time: Union[int, float, pd.Timestamp, tuple, None],
         formula: str,
         model=None,
         **kwargs,
     ) -> None:
         super().__init__(model=model)
-        self.input_validation(data, treatment_time)
+        # input validation TODO : for the moment only valid for given treatment time
+        if treatment_time is not None or not isinstance(treatment_time, tuple):
+            self.input_validation(data, treatment_time)
+
         self.treatment_time = treatment_time
         # set experiment type - usually done in subclasses
         self.expt_type = "Pre-Post Fit"
-        # split data in to pre and post intervention
-        self.datapre = data[data.index < self.treatment_time]
-        self.datapost = data[data.index >= self.treatment_time]
+
+        # Set the data according to if the model is
+        if treatment_time is None or isinstance(treatment_time, tuple):
+            self.datapre = data
+            self.model.set_time_range(self.treatment_time)
+        else:
+            # split data in to pre and post intervention
+            self.datapre = data[data.index < self.treatment_time]
 
         self.formula = formula
 
@@ -101,17 +108,11 @@ def __init__(
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
         self.pre_y, self.pre_X = np.asarray(y), np.asarray(X)
-        # process post-intervention data
-        (new_y, new_x) = build_design_matrices(
-            [self._y_design_info, self._x_design_info], self.datapost
-        )
-        self.post_X = np.asarray(new_x)
-        self.post_y = np.asarray(new_y)
 
         # fit the model to the observed (pre-intervention) data
         if isinstance(self.model, PyMCModel):
             COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.pre_X.shape[0])}
-            self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS)
+            idata = self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS)
         elif isinstance(self.model, RegressorMixin):
             self.model.fit(X=self.pre_X, y=self.pre_y)
         else:
@@ -120,8 +121,29 @@ def __init__(
         # score the goodness of fit to the pre-intervention data
         self.score = self.model.score(X=self.pre_X, y=self.pre_y)
 
+        if treatment_time is None or isinstance(treatment_time, tuple):
+            self.treatment_time = int(
+                az.extract(idata, group="posterior", var_names="switchpoint")
+                .mean("sample")
+                .values
+            )
+            self.datapre = data[data.index < self.treatment_time]
+            (new_y, new_x) = build_design_matrices(
+                [self._y_design_info, self._x_design_info], self.datapre
+            )
+            self.pre_X = np.asarray(new_x)
+            self.pre_y = np.asarray(new_y)
+
         # get the model predictions of the observed (pre-intervention) data
         self.pre_pred = self.model.predict(X=self.pre_X)
+        # process post-intervention data
+        self.datapost = data[data.index >= self.treatment_time]
+
+        (new_y, new_x) = build_design_matrices(
+            [self._y_design_info, self._x_design_info], self.datapost
+        )
+        self.post_X = np.asarray(new_x)
+        self.post_y = np.asarray(new_y)
 
         # calculate the counterfactual
         self.post_pred = self.model.predict(X=self.post_X)
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -530,26 +530,30 @@ class InterventionTimeEstimator(PyMCModel):
         --------
         >>> import causalpy as cp
         >>> import numpy as np
-        >>> from causalpy.pymc_models import InterventionTimeEstimator
-        >>> df = cp.load_data("its")
-        >>> y = df["y"].values
-        >>> t = df["t"].values
-        >>> coords = {"seasons": range(12)} # The data is monthly
-        >>> estimator = InterventionTimeEstimator()
-        >>> # We are trying to capture an impulse in the number of death per month due to Covid.
-        >>> estimator.fit(
-        ...     t,
-        ...     y,
-        ...     coords,
-        ...     priors={"impulse":[]}
-        ... )
-        Inference data...
+        >>> from patsy import build_design_matrices, dmatrices
+        >>> from causalpy.pymc_models import InterventionTimeEstimator as ITE
+        >>> data = cp.load_data("its")
+        >>> formula="y ~ 1 + t + C(month)"
+        >>> y, X = dmatrices(formula, data)
+        >>> outcome_variable_name = y.design_info.column_names[0]
+        >>> labels = X.design_info.column_names
+        >>> _y, _X = np.asarray(y), np.asarray(X)
+        >>> COORDS = {"coeffs":labels, "obs_ind": np.arange(_X.shape[0])}
+        >>> model = ITE(sample_kwargs={"draws" : 10, "tune":10, "progressbar":False}) # For a quick overview. Remove sample_kwargs parameter for better performance
+        >>> model.set_time_range(None)
+        >>> model.fit(X=_X, y=_y, coords=COORDS)
+        Inference ...
     """
 
-    def build_model(self, t, y, coords, time_range, grain_season, priors):
+    def __init__(self, priors={}, sample_kwargs=None):
+        super().__init__(sample_kwargs)
+        self.priors = priors
+
+    def build_model(self, X, t, y, coords):
         """
         Defines the PyMC model
 
+        :param X: A dataframe of the covariates
         :param t: An array of values representing the time over which y is spread
         :param y: An array of values representing our outcome y
         :param coords: An optional dictionary with the coordinate names for our instruments.
@@ -564,80 +568,134 @@ def build_model(self, t, y, coords, time_range, grain_season, priors):
         with self:
             self.add_coords(coords)
 
-            if time_range is None:
-                time_range = (t.min(), t.max())
-
+            t = pm.Data("t", t, dims="obs_ind")
+            X = pm.Data("X", X, dims=["obs_ind", "coeffs"])
+            y = pm.Data("y", y[:, 0], dims="obs_ind")
+            lower_bound = pm.Data("lower_bound", self.time_range[0])
+            upper_bound = pm.Data("upper_bound", self.time_range[1])
             # --- Priors ---
             switchpoint = pm.Uniform(
-                "switchpoint", lower=time_range[0], upper=time_range[1]
+                "switchpoint", lower=lower_bound, upper=upper_bound
             )
-            alpha = pm.Normal(name="alpha", mu=0, sigma=50)
-            beta = pm.Normal(name="beta", mu=0, sigma=50)
-            seasons = 0
-            if "seasons" in coords and len(coords["seasons"]) > 0:
-                season_idx = np.arange(len(y)) // grain_season % len(coords["seasons"])
-                seasons_effect = pm.Normal(
-                    "seasons_effect", mu=0, sigma=50, dims="seasons"
-                )
-                seasons = seasons_effect[season_idx]
+            beta = pm.Normal(name="beta", mu=0, sigma=50, dims="coeffs")
 
             # --- Intervention effect ---
             level = trend = impulse = 0
 
-            if "level" in priors:
+            if "level" in self.priors:
                 mu, sigma = (
                     (0, 50)
-                    if len(priors["level"]) != 2
-                    else (priors["level"][0], priors["level"][1])
+                    if len(self.priors["level"]) != 2
+                    else (self.priors["level"][0], self.priors["level"][1])
                 )
                 level = pm.Normal(
                     "level",
                     mu=mu,
                     sigma=sigma,
                 )
-            if "trend" in priors:
+            if "trend" in self.priors:
                 mu, sigma = (
                     (0, 50)
-                    if len(priors["trend"]) != 2
-                    else (priors["trend"][0], priors["trend"][1])
+                    if len(self.priors["trend"]) != 2
+                    else (self.priors["trend"][0], self.priors["trend"][1])
                 )
                 trend = pm.Normal("trend", mu=mu, sigma=sigma)
-            if "impulse" in priors:
+            if "impulse" in self.priors:
                 mu, sigma1, sigma2 = (
                     (0, 50, 50)
-                    if len(priors["impulse"]) != 3
+                    if len(self.priors["impulse"]) != 3
                     else (
-                        priors["impulse"][0],
-                        priors["impulse"][1],
-                        priors["impulse"][2],
+                        self.priors["impulse"][0],
+                        self.priors["impulse"][1],
+                        self.priors["impulse"][2],
                     )
                 )
                 impulse_amplitude = pm.Normal("impulse_amplitude", mu=mu, sigma=sigma1)
                 decay_rate = pm.HalfNormal("decay_rate", sigma=sigma2)
-                impulse = impulse_amplitude * pm.math.exp(
-                    -decay_rate * abs(t - switchpoint)
+                impulse = pm.Deterministic(
+                    "impulse",
+                    impulse_amplitude
+                    * pm.math.exp(-decay_rate * pm.math.abs(t - switchpoint)),
                 )
 
             # --- Parameterization ---
             weight = pm.math.sigmoid(t - switchpoint)
-            # Compute and store the modelled time series
-            mu_ts = pm.Deterministic(name="mu_ts", var=alpha + beta * t + seasons)
+            # Compute and store the base time series
+            mu = pm.Deterministic(name="mu", var=pm.math.dot(X, beta))
             # Compute and store the modelled intervention effect
             mu_in = pm.Deterministic(
                 name="mu_in", var=level + trend * (t - switchpoint) + impulse
             )
-            # Compute and store the the sum of the intervention and the time series
-            mu = pm.Deterministic("mu", mu_ts + weight * mu_in)
+            # Compute and store the sum of the base time series and the intervention's effect
+            mu_ts = pm.Deterministic("mu_ts", mu + weight * mu_in)
             sigma = pm.HalfNormal("sigma", 1)
 
             # --- Likelihood ---
-            pm.Normal("y_hat", mu=mu, sigma=sigma, observed=y)
+            # Likelihood of the base time series
+            pm.Normal("y_hat", mu=mu, sigma=sigma, dims="obs_ind")
+            # Likelihodd of the base time series and the intervention's effect
+            pm.Normal("y_ts", mu=mu_ts, sigma=sigma, observed=y, dims="obs_ind")
 
-    def fit(self, t, y, coords, time_range=None, grain_season=1, priors={}, n=1000):
-        """
-        Draw samples from posterior distribution
+    def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None:
+        """Draw samples from posterior, prior predictive, and posterior predictive
+        distributions, placing them in the model's idata attribute.
         """
-        self.build_model(t, y, coords, time_range, grain_season, priors)
+
+        # Ensure random_seed is used in sample_prior_predictive() and
+        # sample_posterior_predictive() if provided in sample_kwargs.
+        random_seed = self.sample_kwargs.get("random_seed", None)
+        t = X[:, -1]
+        if self.time_range is None:
+            self.time_range = (t.min(), t.max())
+        self.build_model(X, t, y, coords)
         with self:
-            self.idata = pm.sample(n, progressbar=False, **self.sample_kwargs)
+            self.idata = pm.sample(max_treedepth=15, **self.sample_kwargs)
+            self.idata.extend(pm.sample_prior_predictive(random_seed=random_seed))
+            self.idata.extend(
+                pm.sample_posterior_predictive(
+                    self.idata, progressbar=False, random_seed=random_seed
+                )
+            )
         return self.idata
+
+    def predict(self, X):
+        """
+        Predict data given input data `X`
+
+        .. caution::
+            Results in KeyError if model hasn't been fit.
+        """
+
+        # Ensure random_seed is used in sample_prior_predictive() and
+        # sample_posterior_predictive() if provided in sample_kwargs.
+        random_seed = self.sample_kwargs.get("random_seed", None)
+        t = X[:, -1]
+        self._data_setter(X, t)
+        with self:  # sample with new input data
+            post_pred = pm.sample_posterior_predictive(
+                self.idata,
+                var_names=["y_hat", "y_ts", "mu", "mu_ts", "mu_in"],
+                progressbar=False,
+                random_seed=random_seed,
+            )
+        return post_pred
+
+    def _data_setter(self, X, t) -> None:
+        """
+        Set data for the model.
+
+        This method is used internally to register new data for the model for
+        prediction.
+        """
+        new_no_of_observations = X.shape[0]
+        with self:
+            pm.set_data(
+                {"X": X, "t": t, "y": np.zeros(new_no_of_observations)},
+                coords={"obs_ind": np.arange(new_no_of_observations)},
+            )
+
+    def set_time_range(self, time_range):
+        """
+        Set time_range.
+        """
+        self.time_range = time_range
diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg
@@ -1,5 +1,5 @@
 <svg width="140" height="20" viewBox="0 0 140 20" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
-    <title>interrogate: 90.7%</title>
+    <title>interrogate: 90.8%</title>
     <g transform="matrix(1,0,0,1,22,0)">
         <g id="backgrounds" transform="matrix(1.32789,0,0,1,-22.3892,0)">
             <rect x="0" y="0" width="71" height="20" style="fill:rgb(85,85,85);"/>
@@ -12,8 +12,8 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110">
         <text x="590" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">interrogate</text>
         <text x="590" y="140" transform="scale(.1)" textLength="610">interrogate</text>
-        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">90.7%</text>
-        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">90.7%</text>
+        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">90.8%</text>
+        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">90.8%</text>
     </g>
     <g id="logo-shadow" serif:id="logo shadow" transform="matrix(0.854876,0,0,0.854876,-6.73514,1.732)">
         <g transform="matrix(0.299012,0,0,0.299012,9.70229,-6.68582)">
diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md
@@ -40,6 +40,7 @@ did_pymc_banks.ipynb
 its_skl.ipynb
 its_pymc.ipynb
 its_covid.ipynb
+its_no_treatment_time.ipynb
 :::
 
 :::{toctree}
diff --git a/docs/source/notebooks/its_no_treatment_time.ipynb b/docs/source/notebooks/its_no_treatment_time.ipynb