simplify _build_data a bit + remove accessor methods

drbenvincent · drbenvincent · commit 51e19f80655e · 2025-08-30T16:20:24.000+01:00
diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py
@@ -105,18 +105,27 @@ def algorithm(self) -> None:
         if isinstance(self.model, PyMCModel):
             COORDS = {
                 "coeffs": self.labels,
-                "obs_ind": np.arange(self.pre_X.shape[0]),
+                "obs_ind": np.arange(self.data.X.sel(period="pre").shape[0]),
                 "treated_units": ["unit_0"],
             }
-            self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS)
+            self.model.fit(
+                X=self.data.X.sel(period="pre"),
+                y=self.data.y.sel(period="pre"),
+                coords=COORDS,
+            )
         elif isinstance(self.model, RegressorMixin):
             # For OLS models, use 1D y data
-            self.model.fit(X=self.pre_X, y=self.pre_y.isel(treated_units=0))
+            self.model.fit(
+                X=self.data.X.sel(period="pre"),
+                y=self.data.y.sel(period="pre").isel(treated_units=0),
+            )
         else:
             raise ValueError("Model type not recognized")
 
         # 2. Score the goodness of fit to the pre-intervention data
-        self.score = self.model.score(X=self.pre_X, y=self.pre_y)
+        self.score = self.model.score(
+            X=self.data.X.sel(period="pre"), y=self.data.y.sel(period="pre")
+        )
 
         # 3. Generate predictions for the full dataset using unified approach
         # This creates predictions aligned with our complete time series
@@ -187,53 +196,26 @@ def _build_data(self, data: pd.DataFrame) -> xr.Dataset:
         # Create period coordinate based on treatment time
         period_coord = xr.where(data.index < self.treatment_time, "pre", "post")
 
-        # Return complete time series as a single xarray Dataset
-        X_array = xr.DataArray(
-            np.asarray(X_full),
-            dims=["obs_ind", "coeffs"],
-            coords={
-                "obs_ind": data.index,
-                "coeffs": self.labels,
-                "period": ("obs_ind", period_coord),
-            },
-        )
-
-        y_array = xr.DataArray(
-            np.asarray(y_full),
-            dims=["obs_ind", "treated_units"],
-            coords={
-                "obs_ind": data.index,
-                "treated_units": ["unit_0"],
-                "period": ("obs_ind", period_coord),
-            },
-        )
-
-        # Create dataset and use set_xindex to make period selectable with .sel()
-        dataset = xr.Dataset({"X": X_array, "y": y_array})
-        dataset = dataset.set_xindex("period")
-
-        return dataset
-
-    # Properties for pre/post intervention data access
-    @property
-    def pre_X(self) -> xr.DataArray:
-        """Pre-intervention features."""
-        return self.data.X.sel(period="pre")
-
-    @property
-    def pre_y(self) -> xr.DataArray:
-        """Pre-intervention outcomes."""
-        return self.data.y.sel(period="pre")
-
-    @property
-    def post_X(self) -> xr.DataArray:
-        """Post-intervention features."""
-        return self.data.X.sel(period="post")
-
-    @property
-    def post_y(self) -> xr.DataArray:
-        """Post-intervention outcomes."""
-        return self.data.y.sel(period="post")
+        # Return as a xarray.Dataset
+        common_coords = {
+            "obs_ind": data.index,
+            "period": ("obs_ind", period_coord),
+        }
+
+        return xr.Dataset(
+            {
+                "X": xr.DataArray(
+                    np.asarray(X_full),
+                    dims=["obs_ind", "coeffs"],
+                    coords={**common_coords, "coeffs": self.labels},
+                ),
+                "y": xr.DataArray(
+                    np.asarray(y_full),
+                    dims=["obs_ind", "treated_units"],
+                    coords={**common_coords, "treated_units": ["unit_0"]},
+                ),
+            }
+        ).set_xindex("period")
 
     def input_validation(self, data, treatment_time):
         """Validate the input data and model formula for correctness"""
@@ -285,7 +267,7 @@ def _bayesian_plot(
         # TOP PLOT --------------------------------------------------
         # pre-intervention period
         h_line, h_patch = plot_xY(
-            self.pre_X.obs_ind,
+            self.data.X.sel(period="pre").obs_ind,
             pre_pred.mu.isel(treated_units=0),
             ax=ax[0],
             plot_hdi_kwargs={"color": "C0"},
@@ -294,8 +276,8 @@ def _bayesian_plot(
         labels = ["Pre-intervention period"]
 
         (h,) = ax[0].plot(
-            self.pre_X.obs_ind,
-            self.pre_y.isel(treated_units=0),
+            self.data.X.sel(period="pre").obs_ind,
+            self.data.y.sel(period="pre").isel(treated_units=0),
             "k.",
             label="Observations",
         )
@@ -304,7 +286,7 @@ def _bayesian_plot(
 
         # post intervention period
         h_line, h_patch = plot_xY(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             post_pred.mu.isel(treated_units=0),
             ax=ax[0],
             plot_hdi_kwargs={"color": "C1"},
@@ -313,17 +295,17 @@ def _bayesian_plot(
         labels.append(counterfactual_label)
 
         ax[0].plot(
-            self.post_X.obs_ind,
-            self.post_y.isel(treated_units=0),
+            self.data.X.sel(period="post").obs_ind,
+            self.data.y.sel(period="post").isel(treated_units=0),
             "k.",
         )
 
         # Shaded causal effect - use direct calculation
         post_pred_mu = post_pred.mu.mean(dim=["chain", "draw"]).isel(treated_units=0)
         h = ax[0].fill_between(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             y1=post_pred_mu,
-            y2=self.post_y.isel(treated_units=0),
+            y2=self.data.y.sel(period="post").isel(treated_units=0),
             color="C0",
             alpha=0.25,
         )
@@ -339,20 +321,20 @@ def _bayesian_plot(
 
         # MIDDLE PLOT -----------------------------------------------
         plot_xY(
-            self.pre_X.obs_ind,
+            self.data.X.sel(period="pre").obs_ind,
             self.impact.sel(period="pre").isel(treated_units=0),
             ax=ax[1],
             plot_hdi_kwargs={"color": "C0"},
         )
         plot_xY(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             self.impact.sel(period="post").isel(treated_units=0),
             ax=ax[1],
             plot_hdi_kwargs={"color": "C1"},
         )
         ax[1].axhline(y=0, c="k")
         ax[1].fill_between(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             y1=self.impact.sel(period="post")
             .mean(["chain", "draw"])
             .isel(treated_units=0),
@@ -365,7 +347,7 @@ def _bayesian_plot(
         # BOTTOM PLOT -----------------------------------------------
         ax[2].set(title="Cumulative Causal Impact")
         plot_xY(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             self.post_impact_cumulative.isel(treated_units=0),
             ax=ax[2],
             plot_hdi_kwargs={"color": "C1"},
@@ -424,12 +406,18 @@ def _ols_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, List[plt.Axes]
             pre_pred = self.predictions.sel(period="pre")
             post_pred = self.predictions.sel(period="post")
 
-        ax[0].plot(self.pre_X.obs_ind, self.pre_y, "k.")
-        ax[0].plot(self.post_X.obs_ind, self.post_y, "k.")
+        ax[0].plot(
+            self.data.X.sel(period="pre").obs_ind, self.data.y.sel(period="pre"), "k."
+        )
+        ax[0].plot(
+            self.data.X.sel(period="post").obs_ind, self.data.y.sel(period="post"), "k."
+        )
 
-        ax[0].plot(self.pre_X.obs_ind, pre_pred, c="k", label="model fit")
         ax[0].plot(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="pre").obs_ind, pre_pred, c="k", label="model fit"
+        )
+        ax[0].plot(
+            self.data.X.sel(period="post").obs_ind,
             post_pred,
             label=counterfactual_label,
             ls=":",
@@ -439,31 +427,35 @@ def _ols_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, List[plt.Axes]
             title=f"$R^2$ on pre-intervention data = {round_num(self.score, round_to)}"
         )
 
-        ax[1].plot(self.pre_X.obs_ind, self.impact.sel(period="pre"), "k.")
         ax[1].plot(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="pre").obs_ind, self.impact.sel(period="pre"), "k."
+        )
+        ax[1].plot(
+            self.data.X.sel(period="post").obs_ind,
             self.impact.sel(period="post"),
             "k.",
             label=counterfactual_label,
         )
         ax[1].axhline(y=0, c="k")
         ax[1].set(title="Causal Impact")
 
-        ax[2].plot(self.post_X.obs_ind, self.post_impact_cumulative, c="k")
+        ax[2].plot(
+            self.data.X.sel(period="post").obs_ind, self.post_impact_cumulative, c="k"
+        )
         ax[2].axhline(y=0, c="k")
         ax[2].set(title="Cumulative Causal Impact")
 
         # Shaded causal effect
         ax[0].fill_between(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             y1=np.squeeze(post_pred),
-            y2=np.squeeze(self.post_y),
+            y2=np.squeeze(self.data.y.sel(period="post")),
             color="C0",
             alpha=0.25,
             label="Causal impact",
         )
         ax[1].fill_between(
-            self.post_X.obs_ind,
+            self.data.X.sel(period="post").obs_ind,
             y1=np.squeeze(self.impact.sel(period="post")),
             color="C0",
             alpha=0.25,
diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg
@@ -1,5 +1,5 @@
 <svg width="140" height="20" viewBox="0 0 140 20" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
-    <title>interrogate: 95.6%</title>
+    <title>interrogate: 95.5%</title>
     <g transform="matrix(1,0,0,1,22,0)">
         <g id="backgrounds" transform="matrix(1.32789,0,0,1,-22.3892,0)">
             <rect x="0" y="0" width="71" height="20" style="fill:rgb(85,85,85);"/>
@@ -12,8 +12,8 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110">
         <text x="590" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">interrogate</text>
         <text x="590" y="140" transform="scale(.1)" textLength="610">interrogate</text>
-        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">95.6%</text>
-        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">95.6%</text>
+        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">95.5%</text>
+        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">95.5%</text>
     </g>
     <g id="logo-shadow" serif:id="logo shadow" transform="matrix(0.854876,0,0,0.854876,-6.73514,1.732)">
         <g transform="matrix(0.299012,0,0,0.299012,9.70229,-6.68582)">