extract input validation code into private methods

drbenvincent · drbenvincent · commit 1ba64fbba426 · 2023-01-05T20:51:59.000Z
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -62,16 +62,7 @@ def __init__(
         **kwargs,
     ) -> None:
         super().__init__(model=model, **kwargs)
-
-        # Input validation
-        if isinstance(data.index, pd.DatetimeIndex):
-            assert isinstance(
-                treatment_time, pd.Timestamp
-            ), "If data.index is DatetimeIndex, treatment_time must be pd.Timestamp."
-        else:
-            assert (
-                isinstance(treatment_time, pd.Timestamp) is False
-            ), "If treatment_time is pd.Timestamp, this only makese sense if data.index is DatetimeIndex."  # noqa: E501
+        self._input_validation(data, treatment_time)
 
         self.treatment_time = treatment_time
         # split data in to pre and post intervention
@@ -124,6 +115,17 @@ def __init__(
         # cumulative impact post
         self.post_impact_cumulative = self.post_impact.cumsum(dim="obs_ind")
 
+    def _input_validation(self, data, treatment_time):
+        """Validate the input data for correctness"""
+        if isinstance(data.index, pd.DatetimeIndex):
+            assert isinstance(
+                treatment_time, pd.Timestamp
+            ), "If data.index is DatetimeIndex, treatment_time must be pd.Timestamp."
+        else:
+            assert (
+                isinstance(treatment_time, pd.Timestamp) is False
+            ), "If treatment_time is pd.Timestamp, this only makese sense if data.index is DatetimeIndex."  # noqa: E501
+
     def plot(self):
 
         """Plot the results"""
@@ -276,36 +278,15 @@ def __init__(
         self.formula = formula
         self.time_variable_name = time_variable_name
         self.group_variable_name = group_variable_name
+        self._input_validation()
+
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
         self.y, self.X = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
-        # Input validation ----------------------------------------------------
-        assert (
-            "post_treatment" in formula
-        ), "A predictor called `post_treatment` should be in the dataframe"
-        assert (
-            "post_treatment" in self.data.columns
-        ), "Require a boolean column labelling observations which are `treated`"
-        # Check for `unit` in the incoming dataframe.
-        # *This is only used for plotting purposes*
-        assert (
-            "unit" in self.data.columns
-        ), """
-        Require a `unit` column to label unique units.
-        This is used for plotting purposes
-        """
-        # Check that `group_variable_name` is dummy coded. It should be 0 or 1
-        assert not set(self.data[self.group_variable_name]).difference(
-            set([0, 1])
-        ), f"""
-            The grouping variable {self.group_variable_name} should be dummy coded.
-            Consisting of 0's and 1's only.
-        """
-
         COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
         self.model.fit(X=self.X, y=self.y, coords=COORDS)
 
@@ -374,6 +355,30 @@ def __init__(
             if "post_treatment" in label and self.group_variable_name in label:
                 self.causal_impact = self.idata.posterior["beta"].isel({"coeffs": i})
 
+    def _input_validation(self):
+        """Validate the input data for correctness"""
+        assert (
+            "post_treatment" in self.formula
+        ), "A predictor called `post_treatment` should be in the dataframe"
+        assert (
+            "post_treatment" in self.data.columns
+        ), "Require a boolean column labelling observations which are `treated`"
+        # Check for `unit` in the incoming dataframe.
+        # *This is only used for plotting purposes*
+        assert (
+            "unit" in self.data.columns
+        ), """
+        Require a `unit` column to label unique units.
+        This is used for plotting purposes
+        """
+        # Check that `group_variable_name` is dummy coded. It should be 0 or 1
+        assert not set(self.data[self.group_variable_name]).difference(
+            set([0, 1])
+        ), f"""
+            The grouping variable {self.group_variable_name} should be dummy coded.
+            Consisting of 0's and 1's only.
+        """
+
     def plot(self):
         """Plot the results.
         Creating the combined mean + HDI legend entries is a bit involved.
@@ -686,6 +691,7 @@ def __init__(
         self.formula = formula
         self.group_variable_name = group_variable_name
         self.pretreatment_variable_name = pretreatment_variable_name
+        self._input_validation()
 
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
@@ -694,17 +700,6 @@ def __init__(
         self.y, self.X = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
-        # Input validation ----------------------------------------------------
-        # Check that `group_variable_name` has TWO levels, representing the
-        # treated/untreated. But it does not matter what the actual names of
-        # the levels are.
-        assert (
-            len(pd.Categorical(self.data[self.group_variable_name]).categories) == 2
-        ), f"""
-            There must be 2 levels of the grouping variable {self.group_variable_name}
-            .I.e. the treated and untreated.
-        """
-
         # fit the model to the observed (pre-intervention) data
         COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
         self.model.fit(X=self.X, y=self.y, coords=COORDS)
@@ -743,6 +738,18 @@ def __init__(
 
         # ================================================================
 
+    def _input_validation(self):
+        """Validate the input data for correctness"""
+        # Check that `group_variable_name` has TWO levels, representing the
+        # treated/untreated. But it does not matter what the actual names of
+        # the levels are.
+        assert (
+            len(pd.Categorical(self.data[self.group_variable_name]).categories) == 2
+        ), f"""
+            There must be 2 levels of the grouping variable {self.group_variable_name}
+            .I.e. the treated and untreated.
+        """
+
     def plot(self):
         """Plot the results"""
         fig, ax = plt.subplots(
diff --git a/img/interrogate_badge.svg b/img/interrogate_badge.svg
@@ -1,5 +1,5 @@
 <svg width="140" height="20" viewBox="0 0 140 20" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
-    <title>interrogate: 45.5%</title>
+    <title>interrogate: 46.9%</title>
     <g transform="matrix(1,0,0,1,22,0)">
         <g id="backgrounds" transform="matrix(1.32789,0,0,1,-22.3892,0)">
             <rect x="0" y="0" width="71" height="20" style="fill:rgb(85,85,85);"/>
@@ -12,8 +12,8 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110">
         <text x="590" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">interrogate</text>
         <text x="590" y="140" transform="scale(.1)" textLength="610">interrogate</text>
-        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">45.5%</text>
-        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">45.5%</text>
+        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">46.9%</text>
+        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">46.9%</text>
     </g>
     <g id="logo-shadow" serif:id="logo shadow" transform="matrix(0.854876,0,0,0.854876,-6.73514,1.732)">
         <g transform="matrix(0.299012,0,0,0.299012,9.70229,-6.68582)">