Added post_treatment_variable_name parameter and sklearn model summary for did

Rojan Shrestha · Rojan Shrestha · commit 4ebe1a704f24 · 2025-07-29T22:06:05.000+05:45
diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py
@@ -26,7 +26,6 @@
 
 from causalpy.custom_exceptions import (
     DataException,
-    FormulaException,
 )
 from causalpy.plot_utils import plot_xY
 from causalpy.pymc_models import PyMCModel
@@ -84,6 +83,7 @@ def __init__(
         formula: str,
         time_variable_name: str,
         group_variable_name: str,
+        post_treatment_variable_name: str = "post_treatment",
         model=None,
         **kwargs,
     ) -> None:
@@ -95,6 +95,7 @@ def __init__(
         self.formula = formula
         self.time_variable_name = time_variable_name
         self.group_variable_name = group_variable_name
+        self.post_treatment_variable_name = post_treatment_variable_name
         self.input_validation()
 
         y, X = dmatrices(formula, self.data)
@@ -128,6 +129,12 @@ def __init__(
             }
             self.model.fit(X=self.X, y=self.y, coords=COORDS)
         elif isinstance(self.model, RegressorMixin):
+            # For scikit-learn models, automatically set fit_intercept=False
+            # This ensures the intercept is included in the coefficients array rather than being a separate intercept_ attribute
+            # without this, the intercept is not included in the coefficients array hence would be displayed as 0 in the model summary
+            # TODO: later, this should be handled in ScikitLearnAdaptor itself
+            if hasattr(self.model, "fit_intercept"):
+                self.model.fit_intercept = False
             self.model.fit(X=self.X, y=self.y)
         else:
             raise ValueError("Model type not recognized")
@@ -173,7 +180,7 @@ def __init__(
             # just the treated group
             .query(f"{self.group_variable_name} == 1")
             # just the treatment period(s)
-            .query("post_treatment == True")
+            .query(f"{self.post_treatment_variable_name} == True")
             # drop the outcome variable
             .drop(self.outcome_variable_name, axis=1)
             # We may have multiple units per time point, we only want one time point
@@ -189,7 +196,10 @@ def __init__(
         # INTERVENTION: set the interaction term between the group and the
         # post_treatment variable to zero. This is the counterfactual.
         for i, label in enumerate(self.labels):
-            if "post_treatment" in label and self.group_variable_name in label:
+            if (
+                self.post_treatment_variable_name in label
+                and self.group_variable_name in label
+            ):
                 new_x.iloc[:, i] = 0
         self.y_pred_counterfactual = self.model.predict(np.asarray(new_x))
 
@@ -198,32 +208,53 @@ def __init__(
             # This is the coefficient on the interaction term
             coeff_names = self.model.idata.posterior.coords["coeffs"].data
             for i, label in enumerate(coeff_names):
-                if "post_treatment" in label and self.group_variable_name in label:
+                if (
+                    self.post_treatment_variable_name in label
+                    and self.group_variable_name in label
+                ):
                     self.causal_impact = self.model.idata.posterior["beta"].isel(
                         {"coeffs": i}
                     )
         elif isinstance(self.model, RegressorMixin):
             # This is the coefficient on the interaction term
-            # TODO: CHECK FOR CORRECTNESS
-            self.causal_impact = (
-                self.y_pred_treatment[1] - self.y_pred_counterfactual[0]
-            ).item()
+            # Store the coefficient into dictionary {intercept:value}
+            coef_map = dict(zip(self.labels, self.model.get_coeffs()))
+            # Create and find the interaction term based on the values user provided
+            interaction_term = (
+                f"{self.group_variable_name}:{self.post_treatment_variable_name}"
+            )
+            matched_key = next((k for k in coef_map if interaction_term in k), None)
+            att = coef_map.get(matched_key)
+            self.causal_impact = att
         else:
             raise ValueError("Model type not recognized")
 
         return
 
     def input_validation(self):
         """Validate the input data and model formula for correctness"""
-        if "post_treatment" not in self.formula:
-            raise FormulaException(
-                "A predictor called `post_treatment` should be in the formula"
-            )
-
-        if "post_treatment" not in self.data.columns:
-            raise DataException(
-                "Require a boolean column labelling observations which are `treated`"
-            )
+        if (
+            self.post_treatment_variable_name not in self.formula
+            or self.post_treatment_variable_name not in self.data.columns
+        ):
+            if self.post_treatment_variable_name == "post_treatment":
+                # Default case - user didn't specify custom name, so guide them to use "post_treatment"
+                raise DataException(
+                    "Missing 'post_treatment' in formula or dataset.\n"
+                    "Note: post_treatment_variable_name might have been set to 'post_treatment' by default.\n"
+                    "1) Add 'post_treatment' to formula (e.g., 'y ~ 1 + group*post_treatment')\n"
+                    "2) and ensure dataset has boolean column 'post_treatment'.\n"
+                    "To use custom name, provide additional argument post_treatment_variable_name='your_post_treatment_variable_name'."
+                )
+            else:
+                # Custom case - user specified custom name, so remind them what they specified
+                raise DataException(
+                    f"Missing required variable '{self.post_treatment_variable_name}' in formula or dataset.\n\n"
+                    f"Since you specified post_treatment_variable_name='{self.post_treatment_variable_name}', "
+                    f"please ensure:\n"
+                    f"1) formula includes '{self.post_treatment_variable_name}'\n"
+                    f"2) dataset has boolean column named '{self.post_treatment_variable_name}'"
+                )
 
         if "unit" not in self.data.columns:
             raise DataException(
diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg
@@ -1,19 +1,19 @@
 <svg width="140" height="20" viewBox="0 0 140 20" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
-    <title>interrogate: 95.5%</title>
+    <title>interrogate: 93.6%</title>
     <g transform="matrix(1,0,0,1,22,0)">
         <g id="backgrounds" transform="matrix(1.32789,0,0,1,-22.3892,0)">
             <rect x="0" y="0" width="71" height="20" style="fill:rgb(85,85,85);"/>
         </g>
-        <rect x="71" y="0" width="47" height="20" data-interrogate="color" style="fill:#4c1"/>
+        <rect x="71" y="0" width="47" height="20" data-interrogate="color" style="fill:#97CA00"/>
         <g transform="matrix(1.19746,0,0,1,-22.3744,-4.85723e-16)">
             <rect x="0" y="0" width="118" height="20" style="fill:url(#_Linear1);"/>
         </g>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110">
         <text x="590" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">interrogate</text>
         <text x="590" y="140" transform="scale(.1)" textLength="610">interrogate</text>
-        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">95.5%</text>
-        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">95.5%</text>
+        <text x="1160" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="370" data-interrogate="result">93.6%</text>
+        <text x="1160" y="140" transform="scale(.1)" textLength="370" data-interrogate="result">93.6%</text>
     </g>
     <g id="logo-shadow" serif:id="logo shadow" transform="matrix(0.854876,0,0,0.854876,-6.73514,1.732)">
         <g transform="matrix(0.299012,0,0,0.299012,9.70229,-6.68582)">