added dynamic naming for hdi columns in _get_plot_data_bayesian, updated tests accordingly, and updated tests' docstring

lpoug · lpoug · commit 44d3870730f5 · 2025-04-08T13:06:40.000+02:00
diff --git a/causalpy/experiments/prepostfit.py b/causalpy/experiments/prepostfit.py
@@ -308,6 +308,13 @@ def _get_plot_data_bayesian(self, hdi_prob: float = 0.94) -> pd.DataFrame:
         Recover the data of a PrePostFit experiment along with the prediction and causal impact information.
         """
         if isinstance(self.model, PyMCModel):
+            hdi_pct = int(round(hdi_prob * 100))
+
+            pred_lower_col = f"pred_hdi_lower_{hdi_pct}"
+            pred_upper_col = f"pred_hdi_upper_{hdi_pct}"
+            impact_lower_col = f"impact_hdi_lower_{hdi_pct}"
+            impact_upper_col = f"impact_hdi_upper_{hdi_pct}"
+
             pre_data = self.datapre.copy()
             post_data = self.datapost.copy()
 
@@ -321,19 +328,19 @@ def _get_plot_data_bayesian(self, hdi_prob: float = 0.94) -> pd.DataFrame:
                 .mean("sample")
                 .values
             )
-            pre_data[["pred_hdi_lower", "pred_hdi_upper"]] = get_hdi_to_df(
+            pre_data[[pred_lower_col, pred_upper_col]] = get_hdi_to_df(
                 self.pre_pred["posterior_predictive"].mu, hdi_prob=hdi_prob
             ).set_index(pre_data.index)
-            post_data[["pred_hdi_lower", "pred_hdi_upper"]] = get_hdi_to_df(
+            post_data[[pred_lower_col, pred_upper_col]] = get_hdi_to_df(
                 self.post_pred["posterior_predictive"].mu, hdi_prob=hdi_prob
             ).set_index(post_data.index)
 
             pre_data["impact"] = self.pre_impact.mean(dim=["chain", "draw"]).values
             post_data["impact"] = self.post_impact.mean(dim=["chain", "draw"]).values
-            pre_data[["impact_hdi_lower", "impact_hdi_upper"]] = get_hdi_to_df(
+            pre_data[[impact_lower_col, impact_upper_col]] = get_hdi_to_df(
                 self.pre_impact, hdi_prob=hdi_prob
             ).set_index(pre_data.index)
-            post_data[["impact_hdi_lower", "impact_hdi_upper"]] = get_hdi_to_df(
+            post_data[[impact_lower_col, impact_upper_col]] = get_hdi_to_df(
                 self.post_impact, hdi_prob=hdi_prob
             ).set_index(post_data.index)
 
diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
@@ -353,6 +353,7 @@ def test_its():
     2. causalpy.InterruptedTimeSeries returns correct type
     3. the correct number of MCMC chains exists in the posterior inference data
     4. the correct number of MCMC draws exists in the posterior inference data
+    5. the method get_plot_data returns a DataFrame with expected columns
     """
     df = (
         cp.load_data("its")
@@ -378,9 +379,21 @@ def test_its():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'pred_hdi_lower', 'pred_hdi_upper', 'impact', 'impact_hdi_lower', 'impact_hdi_upper']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = [
+        "prediction",
+        "pred_hdi_lower_94",
+        "pred_hdi_upper_94",
+        "impact",
+        "impact_hdi_lower_94",
+        "impact_hdi_upper_94",
+    ]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
+
 
 @pytest.mark.integration
 def test_its_covid():
@@ -392,6 +405,7 @@ def test_its_covid():
     2. causalpy.InterruptedtimeSeries returns correct type
     3. the correct number of MCMC chains exists in the posterior inference data
     4. the correct number of MCMC draws exists in the posterior inference data
+    5. the method get_plot_data returns a DataFrame with expected columns
     """
 
     df = (
@@ -418,9 +432,20 @@ def test_its_covid():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'pred_hdi_lower', 'pred_hdi_upper', 'impact', 'impact_hdi_lower', 'impact_hdi_upper']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = [
+        "prediction",
+        "pred_hdi_lower_94",
+        "pred_hdi_upper_94",
+        "impact",
+        "impact_hdi_lower_94",
+        "impact_hdi_upper_94",
+    ]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
 
 
 @pytest.mark.integration
@@ -433,6 +458,7 @@ def test_sc():
     2. causalpy.SyntheticControl returns correct type
     3. the correct number of MCMC chains exists in the posterior inference data
     4. the correct number of MCMC draws exists in the posterior inference data
+    5. the method get_plot_data returns a DataFrame with expected columns
     """
 
     df = cp.load_data("sc")
@@ -463,9 +489,21 @@ def test_sc():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'pred_hdi_lower', 'pred_hdi_upper', 'impact', 'impact_hdi_lower', 'impact_hdi_upper']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = [
+        "prediction",
+        "pred_hdi_lower_94",
+        "pred_hdi_upper_94",
+        "impact",
+        "impact_hdi_lower_94",
+        "impact_hdi_upper_94",
+    ]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
+
 
 @pytest.mark.integration
 def test_sc_brexit():
@@ -477,6 +515,7 @@ def test_sc_brexit():
     2. causalpy.SyntheticControl returns correct type
     3. the correct number of MCMC chains exists in the posterior inference data
     4. the correct number of MCMC draws exists in the posterior inference data
+    5. the method get_plot_data returns a DataFrame with expected columns
     """
 
     df = (
@@ -512,9 +551,20 @@ def test_sc_brexit():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'pred_hdi_lower', 'pred_hdi_upper', 'impact', 'impact_hdi_lower', 'impact_hdi_upper']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = [
+        "prediction",
+        "pred_hdi_lower_94",
+        "pred_hdi_upper_94",
+        "impact",
+        "impact_hdi_lower_94",
+        "impact_hdi_upper_94",
+    ]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
 
 
 @pytest.mark.integration
diff --git a/causalpy/tests/test_integration_skl_examples.py b/causalpy/tests/test_integration_skl_examples.py
@@ -88,6 +88,7 @@ def test_its():
     Loads data and checks:
     1. data is a dataframe
     2. skl_experiements.InterruptedTimeSeries returns correct type
+    3. the method get_plot_data returns a DataFrame with expected columns
     """
 
     df = (
@@ -112,9 +113,13 @@ def test_its():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'impact']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = ["prediction", "impact"]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
 
 
 @pytest.mark.integration
@@ -125,6 +130,7 @@ def test_sc():
     Loads data and checks:
     1. data is a dataframe
     2. skl_experiements.SyntheticControl returns correct type
+    3. the method get_plot_data returns a DataFrame with expected columns
     """
     df = cp.load_data("sc")
     treatment_time = 70
@@ -152,9 +158,13 @@ def test_sc():
         isinstance(item, plt.Axes) for item in ax
     ), "ax must be a numpy.ndarray of plt.Axes"
     plot_data = result.get_plot_data()
-    assert isinstance(plot_data, pd.DataFrame), "The returned object is not a pandas DataFrame"
-    expected_columns = ['prediction', 'impact']
-    assert set(expected_columns).issubset(set(plot_data.columns)), f"DataFrame is missing expected columns {expected_columns}"
+    assert isinstance(plot_data, pd.DataFrame), (
+        "The returned object is not a pandas DataFrame"
+    )
+    expected_columns = ["prediction", "impact"]
+    assert set(expected_columns).issubset(set(plot_data.columns)), (
+        f"DataFrame is missing expected columns {expected_columns}"
+    )
 
 
 @pytest.mark.integration