pymc-labs
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 2 additions & 2 deletions b/‎Makefile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎causalpy/__init__.py‎
Lines changed: 0 additions & 3 deletions b/‎causalpy/__init__.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎causalpy/experiments/base.py‎
Lines changed: 10 additions & 6 deletions b/‎causalpy/experiments/base.py‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎causalpy/experiments/interrupted_time_series.py‎
Lines changed: 8 additions & 0 deletions b/‎causalpy/experiments/interrupted_time_series.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎causalpy/experiments/synthetic_control.py‎
Lines changed: 8 additions & 0 deletions b/‎causalpy/experiments/synthetic_control.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎causalpy/plot_utils.py‎
Lines changed: 14 additions & 7 deletions b/‎causalpy/plot_utils.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎causalpy/pymc_models.py‎
Lines changed: 38 additions & 1 deletion b/‎causalpy/pymc_models.py‎
Lines changed: 38 additions & 1 deletion
diff --git a/‎causalpy/tests/test_plot_utils.py‎
Lines changed: 97 additions & 0 deletions b/‎causalpy/tests/test_plot_utils.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎docs/source/_static/interrogate_badge.svg‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/_static/interrogate_badge.svg‎
Lines changed: 3 additions & 3 deletions
@@ -25,7 +25,7 @@ repos:
         exclude: &exclude_pattern 'iv_weak_instruments.ipynb'
         args: ["--maxkb=1500"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.13.3
+    rev: v0.14.1
     hooks:
       # Run the linter
       - id: ruff
 
@@ -13,10 +13,10 @@ check_lint:
 	interrogate .
 
 doctest:
-	pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py
+	python -m pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py
 
 test:
-	pytest
+	python -m pytest
 
 uml:
 	pyreverse -o png causalpy --output-directory docs/source/_static --ignore tests
 
@@ -11,7 +11,6 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import arviz as az
 
 import causalpy.pymc_models as pymc_models
 import causalpy.skl_models as skl_models
@@ -28,8 +27,6 @@
 from .experiments.regression_kink import RegressionKink
 from .experiments.synthetic_control import SyntheticControl
 
-az.style.use("arviz-darkgrid")
-
 __all__ = [
     "__version__",
     "DifferenceInDifferences",
 
@@ -17,6 +17,8 @@
 
 from abc import abstractmethod
 
+import arviz as az
+import matplotlib.pyplot as plt
 import pandas as pd
 from sklearn.base import RegressorMixin
 
@@ -63,12 +65,14 @@ def plot(self, *args, **kwargs) -> tuple:
         Internally, this function dispatches to either `_bayesian_plot` or `_ols_plot`
         depending on the model type.
         """
-        if isinstance(self.model, PyMCModel):
-            return self._bayesian_plot(*args, **kwargs)
-        elif isinstance(self.model, RegressorMixin):
-            return self._ols_plot(*args, **kwargs)
-        else:
-            raise ValueError("Unsupported model type")
+        # Apply arviz-darkgrid style only during plotting, then revert
+        with plt.style.context(az.style.library["arviz-darkgrid"]):
+            if isinstance(self.model, PyMCModel):
+                return self._bayesian_plot(*args, **kwargs)
+            elif isinstance(self.model, RegressorMixin):
+                return self._ols_plot(*args, **kwargs)
+            else:
+                raise ValueError("Unsupported model type")
 
     @abstractmethod
     def _bayesian_plot(self, *args, **kwargs):
 
@@ -70,6 +70,14 @@ class InterruptedTimeSeries(BaseExperiment):
     ...         }
     ...     ),
     ... )
+
+    Notes
+    -----
+    For Bayesian models, the causal impact is calculated using the posterior expectation
+    (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and
+    its uncertainty represent the systematic causal effect, excluding observation-level
+    noise. The uncertainty bands in the plots reflect parameter uncertainty and
+    counterfactual prediction uncertainty, but not individual observation variability.
     """
 
     expt_type = "Interrupted Time Series"
 
@@ -67,6 +67,14 @@ class SyntheticControl(BaseExperiment):
     ...         }
     ...     ),
     ... )
+
+    Notes
+    -----
+    For Bayesian models, the causal impact is calculated using the posterior expectation
+    (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and
+    its uncertainty represent the systematic causal effect, excluding observation-level
+    noise. The uncertainty bands in the plots reflect parameter uncertainty and
+    counterfactual prediction uncertainty, but not individual observation variability.
     """
 
     supports_ols = True
 
@@ -93,10 +93,17 @@ def get_hdi_to_df(
     :param hdi_prob:
         The size of the HDI, default is 0.94
     """
-    hdi = (
-        az.hdi(x, hdi_prob=hdi_prob)
-        .to_dataframe()
-        .unstack(level="hdi")
-        .droplevel(0, axis=1)
-    )
-    return hdi
+    hdi_result = az.hdi(x, hdi_prob=hdi_prob)
+
+    # Get the data variable name (typically 'mu' or 'x')
+    # We select only the data variable column to exclude coordinates like 'treated_units'
+    data_var = list(hdi_result.data_vars)[0]
+
+    # Convert to DataFrame, select only the data variable column, then unstack
+    # This prevents coordinate values (like 'treated_agg') from appearing as columns
+    hdi_df = hdi_result[data_var].to_dataframe()[[data_var]].unstack(level="hdi")
+
+    # Remove the top level of column MultiIndex to get just 'lower' and 'higher'
+    hdi_df.columns = hdi_df.columns.droplevel(0)
+
+    return hdi_df
@@ -305,7 +305,44 @@ def score(self, X: xr.DataArray, y: xr.DataArray) -> pd.Series:
     def calculate_impact(
         self, y_true: xr.DataArray, y_pred: az.InferenceData
     ) -> xr.DataArray:
-        impact = y_true - y_pred["posterior_predictive"]["y_hat"]
+        """
+        Calculate the causal impact as the difference between observed and predicted values.
+
+        The impact is calculated using the posterior expectation (`mu`) rather than the
+        posterior predictive (`y_hat`). This means the causal impact represents the
+        difference from the expected value of the model, excluding observation noise.
+        This approach provides a cleaner measure of the causal effect by focusing on
+        the systematic difference rather than including sampling variability from the
+        observation noise term.
+
+        Parameters
+        ----------
+        y_true : xr.DataArray
+            The observed outcome values with dimensions ["obs_ind", "treated_units"].
+        y_pred : az.InferenceData
+            The posterior predictive samples containing the "mu" variable, which
+            represents the expected value (mean) of the outcome.
+
+        Returns
+        -------
+        xr.DataArray
+            The causal impact with dimensions ending in "obs_ind". The impact includes
+            posterior uncertainty from the model parameters but excludes observation noise.
+
+        Notes
+        -----
+        By using `mu` (the posterior expectation) rather than `y_hat` (the posterior
+        predictive with observation noise), the uncertainty in the impact reflects:
+        - Parameter uncertainty in the fitted model
+        - Uncertainty in the counterfactual prediction
+
+        But excludes:
+        - Observation-level noise (sigma)
+
+        This makes the impact plots focus on the systematic causal effect rather than
+        individual observation variability.
+        """
+        impact = y_true - y_pred["posterior_predictive"]["mu"]
         return impact.transpose(..., "obs_ind")
 
     def calculate_cumulative_impact(self, impact):
 
@@ -0,0 +1,97 @@
+#   Copyright 2025 - 2025 The PyMC Labs Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+"""
+Tests for plot utility functions
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+
+from causalpy.plot_utils import get_hdi_to_df
+
+
+@pytest.mark.integration
+def test_get_hdi_to_df_with_coordinate_dimensions():
+    """
+    Regression test for bug where get_hdi_to_df returned string coordinate values
+    instead of numeric HDI values when xarray had named coordinate dimensions.
+
+    This bug manifested in multi-cell synthetic control experiments where columns
+    like 'pred_hdi_upper_94' contained the string "treated_agg" instead of
+    numeric upper bound values.
+
+    See: https://github.com/pymc-labs/CausalPy/issues/532
+    """
+    # Create a mock xarray DataArray similar to what's produced in synthetic control
+    # with a coordinate dimension like 'treated_units'
+    np.random.seed(42)
+    n_chains = 2
+    n_draws = 100
+    n_obs = 10
+
+    # Simulate posterior samples with a named coordinate
+    data = np.random.normal(loc=5.0, scale=0.5, size=(n_chains, n_draws, n_obs))
+
+    xr_data = xr.DataArray(
+        data,
+        dims=["chain", "draw", "obs_ind"],
+        coords={
+            "chain": np.arange(n_chains),
+            "draw": np.arange(n_draws),
+            "obs_ind": np.arange(n_obs),
+            "treated_units": "treated_agg",  # This coordinate caused the bug
+        },
+    )
+
+    # Call get_hdi_to_df
+    result = get_hdi_to_df(xr_data, hdi_prob=0.94)
+
+    # Assertions to verify the bug is fixed
+    assert isinstance(result, pd.DataFrame), "Result should be a DataFrame"
+
+    # Check that we have exactly 2 columns (lower and higher)
+    assert result.shape[1] == 2, f"Expected 2 columns, got {result.shape[1]}"
+
+    # Check column names
+    assert "lower" in result.columns, "Should have 'lower' column"
+    assert "higher" in result.columns, "Should have 'higher' column"
+
+    # CRITICAL: Check that columns contain numeric data, not strings
+    assert result["lower"].dtype in [
+        np.float64,
+        np.float32,
+    ], f"'lower' column should be numeric, got {result['lower'].dtype}"
+    assert result["higher"].dtype in [
+        np.float64,
+        np.float32,
+    ], f"'higher' column should be numeric, got {result['higher'].dtype}"
+
+    # Check that no string values like 'treated_agg' appear in the data
+    assert not (result["lower"].astype(str).str.contains("treated_agg").any()), (
+        "'lower' column should not contain coordinate string values"
+    )
+    assert not (result["higher"].astype(str).str.contains("treated_agg").any()), (
+        "'higher' column should not contain coordinate string values"
+    )
+
+    # Verify HDI ordering
+    assert (result["lower"] <= result["higher"]).all(), (
+        "'lower' should be <= 'higher' for all rows"
+    )
+
+    # Verify reasonable HDI values (should be around the mean of 5.0)
+    assert result["lower"].min() > 3.0, "HDI lower bounds should be reasonable"
+    assert result["higher"].max() < 7.0, "HDI upper bounds should be reasonable"