pymc-labs
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions
diff --git a/‎causalpy/custom_exceptions.py
Lines changed: 21 additions & 0 deletions b/‎causalpy/custom_exceptions.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎causalpy/pymc_experiments.py
Lines changed: 73 additions & 38 deletions b/‎causalpy/pymc_experiments.py
Lines changed: 73 additions & 38 deletions
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
     - repo: https://github.com/pre-commit/pre-commit-hooks
-      rev: v4.3.0
+      rev: v4.4.0
       hooks:
           - id: trailing-whitespace
             exclude_types: [svg]
@@ -11,21 +11,21 @@ repos:
           - id: check-yaml
           - id: check-added-large-files
     - repo: https://github.com/pycqa/isort
-      rev: 5.11.2
+      rev: 5.11.4
       hooks:
           - id: isort
             args: [--profile, black]
             types: [python]
     - repo: https://github.com/ambv/black
-      rev: 22.10.0
+      rev: 22.12.0
       hooks:
           - id: black
     - repo: https://github.com/pycqa/flake8
-      rev: 3.9.2
+      rev: 6.0.0
       hooks:
           - id: flake8
     - repo: https://github.com/nbQA-dev/nbQA
-      rev: 1.5.3
+      rev: 1.6.1
       hooks:
         - id: nbqa-black
           # additional_dependencies: [jupytext]  # optional, only if you're using Jupytext
 
@@ -0,0 +1,21 @@
+class BadIndexException(Exception):
+    """Custom exception used when we have a mismatch in types between the dataframe
+    index and an event, typically a treatment or intervention."""
+
+    def __init__(self, message):
+        self.message = message
+
+
+class FormulaException(Exception):
+    """Exception raised given when there is some error in a user-provided model
+    formula"""
+
+    def __init__(self, message):
+        self.message = message
+
+
+class DataException(Exception):
+    """Exception raised given when there is some error in user-provided dataframe"""
+
+    def __init__(self, message):
+        self.message = message
@@ -1,3 +1,5 @@
+from typing import Union
+
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
@@ -6,7 +8,10 @@
 import xarray as xr
 from patsy import build_design_matrices, dmatrices
 
+from causalpy.custom_exceptions import BadIndexException  # NOQA
+from causalpy.custom_exceptions import DataException, FormulaException
 from causalpy.plot_utils import plot_xY
+from causalpy.utils import _is_variable_dummy_coded, _series_has_2_levels
 
 LEGEND_FONT_SIZE = 12
 az.style.use("arviz-darkgrid")
@@ -54,12 +59,14 @@ class TimeSeriesExperiment(ExperimentalDesign):
     def __init__(
         self,
         data: pd.DataFrame,
-        treatment_time: int,
+        treatment_time: Union[int, float, pd.Timestamp],
         formula: str,
         model=None,
         **kwargs,
     ) -> None:
         super().__init__(model=model, **kwargs)
+        self._input_validation(data, treatment_time)
+
         self.treatment_time = treatment_time
         # split data in to pre and post intervention
         self.datapre = data[data.index <= self.treatment_time]
@@ -111,6 +118,21 @@ def __init__(
         # cumulative impact post
         self.post_impact_cumulative = self.post_impact.cumsum(dim="obs_ind")
 
+    def _input_validation(self, data, treatment_time):
+        """Validate the input data and model formula for correctness"""
+        if isinstance(data.index, pd.DatetimeIndex) and not isinstance(
+            treatment_time, pd.Timestamp
+        ):
+            raise BadIndexException(
+                "If data.index is DatetimeIndex, treatment_time must be pd.Timestamp."
+            )
+        if not isinstance(data.index, pd.DatetimeIndex) and isinstance(
+            treatment_time, pd.Timestamp
+        ):
+            raise BadIndexException(
+                "If data.index is not DatetimeIndex, treatment_time must be pd.Timestamp."  # noqa: E501
+            )
+
     def plot(self):
 
         """Plot the results"""
@@ -263,36 +285,15 @@ def __init__(
         self.formula = formula
         self.time_variable_name = time_variable_name
         self.group_variable_name = group_variable_name
+        self._input_validation()
+
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
         self.y, self.X = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
-        # Input validation ----------------------------------------------------
-        assert (
-            "post_treatment" in formula
-        ), "A predictor called `post_treatment` should be in the dataframe"
-        assert (
-            "post_treatment" in self.data.columns
-        ), "Require a boolean column labelling observations which are `treated`"
-        # Check for `unit` in the incoming dataframe.
-        # *This is only used for plotting purposes*
-        assert (
-            "unit" in self.data.columns
-        ), """
-        Require a `unit` column to label unique units.
-        This is used for plotting purposes
-        """
-        # Check that `group_variable_name` is dummy coded. It should be 0 or 1
-        assert not set(self.data[self.group_variable_name]).difference(
-            set([0, 1])
-        ), f"""
-            The grouping variable {self.group_variable_name} should be dummy coded.
-            Consisting of 0's and 1's only.
-        """
-
         COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
         self.model.fit(X=self.X, y=self.y, coords=COORDS)
 
@@ -361,6 +362,29 @@ def __init__(
             if "post_treatment" in label and self.group_variable_name in label:
                 self.causal_impact = self.idata.posterior["beta"].isel({"coeffs": i})
 
+    def _input_validation(self):
+        """Validate the input data and model formula for correctness"""
+        if "post_treatment" not in self.formula:
+            raise FormulaException(
+                "A predictor called `post_treatment` should be in the formula"
+            )
+
+        if "post_treatment" not in self.data.columns:
+            raise DataException(
+                "Require a boolean column labelling observations which are `treated`"
+            )
+
+        if "unit" not in self.data.columns:
+            raise DataException(
+                "Require a `unit` column to label unique units. This is used for plotting purposes"  # noqa: E501
+            )
+
+        if _is_variable_dummy_coded(self.data[self.group_variable_name]) is False:
+            raise DataException(
+                f"""The grouping variable {self.group_variable_name} should be dummy
+                coded. Consisting of 0's and 1's only."""
+            )
+
     def plot(self):
         """Plot the results.
         Creating the combined mean + HDI legend entries is a bit involved.
@@ -536,16 +560,15 @@ def __init__(
         self.formula = formula
         self.running_variable_name = running_variable_name
         self.treatment_threshold = treatment_threshold
+        self._input_validation()
+
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
         self.y, self.X = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
-        # TODO: `treated` is a deterministic function of x and treatment_threshold, so
-        # this could be a function rather than supplied data
-
         # DEVIATION FROM SKL EXPERIMENT CODE =============================
         # fit the model to the observed (pre-intervention) data
         COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
@@ -586,6 +609,18 @@ def __init__(
             - self.pred_discon["posterior_predictive"].sel(obs_ind=0)["mu"]
         )
 
+    def _input_validation(self):
+        """Validate the input data and model formula for correctness"""
+        if "treated" not in self.formula:
+            raise FormulaException(
+                "A predictor called `treated` should be in the formula"
+            )
+
+        if _is_variable_dummy_coded(self.data["treated"]) is False:
+            raise DataException(
+                """The treated variable should be dummy coded. Consisting of 0's and 1's only."""  # noqa: E501
+            )
+
     def _is_treated(self, x):
         """Returns ``True`` if `x` is greater than or equal to the treatment threshold.
 
@@ -673,6 +708,7 @@ def __init__(
         self.formula = formula
         self.group_variable_name = group_variable_name
         self.pretreatment_variable_name = pretreatment_variable_name
+        self._input_validation()
 
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
@@ -681,17 +717,6 @@ def __init__(
         self.y, self.X = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
-        # Input validation ----------------------------------------------------
-        # Check that `group_variable_name` has TWO levels, representing the
-        # treated/untreated. But it does not matter what the actual names of
-        # the levels are.
-        assert (
-            len(pd.Categorical(self.data[self.group_variable_name]).categories) == 2
-        ), f"""
-            There must be 2 levels of the grouping variable {self.group_variable_name}
-            .I.e. the treated and untreated.
-        """
-
         # fit the model to the observed (pre-intervention) data
         COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
         self.model.fit(X=self.X, y=self.y, coords=COORDS)
@@ -730,6 +755,16 @@ def __init__(
 
         # ================================================================
 
+    def _input_validation(self):
+        """Validate the input data and model formula for correctness"""
+        if not _series_has_2_levels(self.data[self.group_variable_name]):
+            raise DataException(
+                f"""
+                There must be 2 levels of the grouping variable
+                {self.group_variable_name}. I.e. the treated and untreated.
+                """
+            )
+
     def plot(self):
         """Plot the results"""
         fig, ax = plt.subplots(