utility function to retrieve hdi and clean get_plot_data_bayesian

lpoug · lpoug · commit d7680f638c79 · 2024-11-06T16:34:30.000+01:00
diff --git a/causalpy/experiments/prepostfit.py b/causalpy/experiments/prepostfit.py
@@ -25,7 +25,7 @@
 from sklearn.base import RegressorMixin
 
 from causalpy.custom_exceptions import BadIndexException
-from causalpy.plot_utils import plot_xY
+from causalpy.plot_utils import plot_xY, get_hdi_to_df
 from causalpy.pymc_models import PyMCModel
 from causalpy.utils import round_num
 
@@ -303,19 +303,6 @@ def ols_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, List[plt.Axes]]
 
         return (fig, ax)
 
-    # def get_plot_data(self) -> pd.DataFrame:
-    #     """Recover the data of a PrePostFit experiment along with the prediction and causal impact information.
-
-    #     Internally, this function dispatches to either `get_plot_data_bayesian` or `get_plot_data_ols`
-    #     depending on the model type.
-    #     """
-    #     if isinstance(self.model, PyMCModel):
-    #         return self.get_plot_data_bayesian()
-    #     elif isinstance(self.model, RegressorMixin):
-    #         return self.get_plot_data_ols()
-    #     else:
-    #         raise ValueError("Unsupported model type")
-
     def get_plot_data_bayesian(self) -> pd.DataFrame:
         """
         Recover the data of a PrePostFit experiment along with the prediction and causal impact information.
@@ -335,23 +322,14 @@ def get_plot_data_bayesian(self) -> pd.DataFrame:
                 .values
             )
             # HDI
-            pre_hdi = (
-                az.hdi(self.pre_pred["posterior_predictive"].mu, hdi_prob=0.94)
-                .to_dataframe()
-                .unstack(level="hdi")
-                .droplevel(0, axis=1)
-            )
-            post_hdi = (
-                az.hdi(self.post_pred["posterior_predictive"].mu, hdi_prob=0.94)
-                .to_dataframe()
-                .unstack(level="hdi")
-                .droplevel(0, axis=1)
-            )
-            pre_data[["pred_hdi_lower", "pred_hdi_upper"]] = pre_hdi
-            post_data[["pred_hdi_lower", "pred_hdi_upper"]] = post_hdi
+            pre_data[["pred_hdi_lower", "pred_hdi_upper"]] = get_hdi_to_df(self.pre_pred["posterior_predictive"].mu)
+            post_data[["pred_hdi_lower", "pred_hdi_upper"]] = get_hdi_to_df(self.post_pred["posterior_predictive"].mu)
             # IMPACT
             pre_data["impact"] = self.pre_impact.mean(dim=["chain", "draw"]).values
             post_data["impact"] = self.post_impact.mean(dim=["chain", "draw"]).values
+            # HDI IMPACT
+            pre_data[["impact_hdi_lower", "impact_hdi_upper"]] = get_hdi_to_df(self.pre_impact)
+            post_data[["impact_hdi_lower", "impact_hdi_upper"]] = get_hdi_to_df(self.post_impact)
 
             self.data_plot = pd.concat([pre_data, post_data])
 
diff --git a/causalpy/plot_utils.py b/causalpy/plot_utils.py
@@ -82,51 +82,22 @@ def plot_xY(
     return (h_line, h_patch)
 
 
-def get_prepostfit_data(result) -> pd.DataFrame:
+def get_hdi_to_df(
+    x: xr.DataArray,
+    hdi_prob: float = 0.94,
+) -> pd.DataFrame:
     """
-    Utility function to recover the data of a PrePostFit experiment along with the prediction and causal impact information.
+    Utility function to calculate and recover HDI intervals.
 
-    :param result:
-        The result of a PrePostFit experiment
+    :param x:
+        Xarray data array
+    :param hdi_prob:
+        The size of the HDI, default is 0.94
     """
-
-    from causalpy.experiments.prepostfit import PrePostFit
-    from causalpy.pymc_models import PyMCModel
-
-    if isinstance(result, PrePostFit):
-        pre_data = result.datapre.copy()
-        post_data = result.datapost.copy()
-
-        if isinstance(result.model, PyMCModel):
-            pre_data["prediction"] = (
-                az.extract(
-                    result.pre_pred, group="posterior_predictive", var_names="mu"
-                )
-                .mean("sample")
-                .values
+    hdi = (
+            az.hdi(x, hdi_prob=hdi_prob)
+            .to_dataframe()
+            .unstack(level="hdi")
+            .droplevel(0, axis=1)
             )
-            post_data["prediction"] = (
-                az.extract(
-                    result.post_pred, group="posterior_predictive", var_names="mu"
-                )
-                .mean("sample")
-                .values
-            )
-            pre_data["impact"] = result.pre_impact.mean(dim=["chain", "draw"]).values
-            post_data["impact"] = result.post_impact.mean(dim=["chain", "draw"]).values
-
-        elif isinstance(result.model, RegressorMixin):
-            pre_data["prediction"] = result.pre_pred
-            post_data["prediction"] = result.post_pred
-            pre_data["impact"] = result.pre_impact
-            post_data["impact"] = result.post_impact
-
-        else:
-            raise ValueError("Other model types are not supported")
-
-        ppf_data = pd.concat([pre_data, post_data])
-
-    else:
-        raise ValueError("Other experiments are not supported")
-
-    return ppf_data
+    return hdi