run pre-commit

NathanielF · NathanielF · commit 3385c57157b6 · 2024-03-17T14:39:17.000Z
Signed-off-by: Nathaniel &lt;NathanielF@users.noreply.github.com&gt;
diff --git a/causalpy/data/datasets.py b/causalpy/data/datasets.py
@@ -20,7 +20,7 @@
     "anova1": {"filename": "ancova_generated.csv"},
     "geolift1": {"filename": "geolift1.csv"},
     "risk": {"filename": "AJR2001.csv"},
-    "nhefs": {"filename": "nhefs.csv"}
+    "nhefs": {"filename": "nhefs.csv"},
 }
 
 
diff --git a/causalpy/data_validation.py b/causalpy/data_validation.py
@@ -142,7 +142,7 @@ def _input_validation(self):
         """Validate the input data and model formula for correctness"""
         treatment = self.formula.split("~")[0]
         test = treatment.strip() in self.data.columns
-        test  = test & (self.outcome_variable in self.data.columns)
+        test = test & (self.outcome_variable in self.data.columns)
         if not test:
             raise DataException(
                 f"""
@@ -158,4 +158,4 @@ def _input_validation(self):
             raise DataException(
                 """Warning. The treatment variable is not 0-1 Binary.
                 """
-            )
+            )
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -30,7 +30,7 @@
     RegressionKinkDataValidator,
     PrePostNEGDDataValidator,
     IVDataValidator,
-    PropensityDataValidator
+    PropensityDataValidator,
 )
 from causalpy.plot_utils import plot_xY
 from causalpy.utils import round_num
@@ -1491,7 +1491,7 @@ class InversePropensityWeighting(ExperimentalDesign, PropensityDataValidator):
     :param outcome_variable
         A string denoting the outcome variable in datq to be reweighted
     :param weighting_scheme:
-        A string denoting which weighting scheme to use among: 'raw', 'robust', 
+        A string denoting which weighting scheme to use among: 'raw', 'robust',
         'doubly robust'
     :param model:
         A PyMC model
@@ -1543,17 +1543,15 @@ def __init__(
 
         COORDS = {"obs_ind": list(range(self.X.shape[0])), "coeffs": self.labels}
         self.coords = COORDS
-        self.model.fit(
-            X=self.X, t=self.t, coords=COORDS
-        )
+        self.model.fit(X=self.X, t=self.t, coords=COORDS)
 
     def make_robust_adjustments(self, ps):
         X = pd.DataFrame(self.X, columns=self.labels)
-        X['ps'] = ps
+        X["ps"] = ps
         X[self.outcome_variable] = self.y
         t = self.t.flatten()
         p_of_t = np.mean(t)
-        X["i_ps"] = np.where(t==1, (p_of_t / X["ps"]), (1 - p_of_t) / (1 - X["ps"]))
+        X["i_ps"] = np.where(t == 1, (p_of_t / X["ps"]), (1 - p_of_t) / (1 - X["ps"]))
         n_ntrt = X[t == 0].shape[0]
         n_trt = X[t == 1].shape[0]
         outcome_trt = X[t == 1][self.outcome_variable]
@@ -1564,10 +1562,9 @@ def make_robust_adjustments(self, ps):
         weighted_outcome0 = outcome_ntrt * i_propensity0
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
 
-
     def make_raw_adjustments(self, ps):
         X = pd.DataFrame(self.X, columns=self.labels)
-        X['ps'] = ps
+        X["ps"] = ps
         X[self.outcome_variable] = self.y
         t = self.t.flatten()
         X["ps"] = np.where(t, X["ps"], 1 - X["ps"])
@@ -1580,28 +1577,26 @@ def make_raw_adjustments(self, ps):
         weighted_outcome1 = outcome_trt * i_propensity1
         weighted_outcome0 = outcome_ntrt * i_propensity0
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
-    
 
     def make_overlap_adjustments(self, ps):
         X = pd.DataFrame(self.X, columns=self.labels)
-        X['ps'] = ps
+        X["ps"] = ps
         X[self.outcome_variable] = self.y
         t = self.t.flatten()
-        X["i_ps"] = np.where(t, (1-X["ps"])*t, X["ps"]*(1-t))
-        n_ntrt = (1-t[t == 0])*X[t == 0]['i_ps']
-        n_trt = t[t == 1]*X[t == 1]['i_ps']
+        X["i_ps"] = np.where(t, (1 - X["ps"]) * t, X["ps"] * (1 - t))
+        n_ntrt = (1 - t[t == 0]) * X[t == 0]["i_ps"]
+        n_trt = t[t == 1] * X[t == 1]["i_ps"]
         outcome_trt = X[t == 1][self.outcome_variable]
         outcome_ntrt = X[t == 0][self.outcome_variable]
         i_propensity0 = X[t == 0]["i_ps"]
         i_propensity1 = X[t == 1]["i_ps"]
-        weighted_outcome1 = t[t == 1]*outcome_trt * i_propensity1
-        weighted_outcome0 = (1-t[t == 0])*outcome_ntrt * i_propensity0
+        weighted_outcome1 = t[t == 1] * outcome_trt * i_propensity1
+        weighted_outcome0 = (1 - t[t == 0]) * outcome_ntrt * i_propensity0
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
 
-
     def make_doubly_robust_adjustment(self, ps):
         X = pd.DataFrame(self.X, columns=self.labels)
-        X['ps'] = ps
+        X["ps"] = ps
         t = self.t.flatten()
         m0 = sk_lin_reg().fit(X[t == 0].astype(float), self.y[t == 0])
         m1 = sk_lin_reg().fit(X[t == 1].astype(float), self.y[t == 1])
@@ -1611,50 +1606,72 @@ def make_doubly_robust_adjustment(self, ps):
         weighted_outcome0 = (1 - t) * (self.y - m0_pred) / (1 - X["ps"]) + m0_pred
         weighted_outcome1 = t * (self.y - m1_pred) / X["ps"] + m1_pred
         return weighted_outcome0, weighted_outcome1, None, None
-    
+
     def get_ate(self, i, idata, method="doubly_robust"):
         ### Post processing the sample posterior distribution for propensity scores
         ### One sample at a time.
         ps = idata["posterior"]["p"].stack(z=("chain", "draw"))[:, i].values
         if method == "robust":
-            weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_robust_adjustments(ps)
+            (
+                weighted_outcome_ntrt,
+                weighted_outcome_trt,
+                n_ntrt,
+                n_trt,
+            ) = self.make_robust_adjustments(ps)
             ntrt = weighted_outcome_ntrt.sum() / n_ntrt
             trt = weighted_outcome_trt.sum() / n_trt
         elif method == "raw":
-            weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_raw_adjustments(ps)
+            (
+                weighted_outcome_ntrt,
+                weighted_outcome_trt,
+                n_ntrt,
+                n_trt,
+            ) = self.make_raw_adjustments(ps)
             ntrt = weighted_outcome_ntrt.sum() / n_ntrt
             trt = weighted_outcome_trt.sum() / n_trt
         elif method == "overlap":
-            weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_overlap_adjustments(ps)
-            ntrt = np.sum(weighted_outcome_ntrt) / np.sum(n_ntrt) 
+            (
+                weighted_outcome_ntrt,
+                weighted_outcome_trt,
+                n_ntrt,
+                n_trt,
+            ) = self.make_overlap_adjustments(ps)
+            ntrt = np.sum(weighted_outcome_ntrt) / np.sum(n_ntrt)
             trt = np.sum(weighted_outcome_trt) / np.sum(n_trt)
         else:
-            weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_doubly_robust_adjustment(
-                ps
-            )
+            (
+                weighted_outcome_ntrt,
+                weighted_outcome_trt,
+                n_ntrt,
+                n_trt,
+            ) = self.make_doubly_robust_adjustment(ps)
             trt = np.mean(weighted_outcome_trt)
             ntrt = np.mean(weighted_outcome_ntrt)
         ate = trt - ntrt
         return [ate, trt, ntrt]
-    
+
     def plot_ATE(self, idata=None, method=None, prop_draws=100, ate_draws=300):
         if idata is None:
             idata = self.idata
-        if method is None: 
+        if method is None:
             method = self.weighting_scheme
-        
+
         def plot_weights(bins, top0, top1, ax):
             ax.axhline(0, c="gray", linewidth=1)
-            bars0 = ax.bar(bins[:-1] + 0.025, top0, width=0.04, facecolor="red", alpha=0.3)
-            bars1 = ax.bar(bins[:-1] + 0.025, -top1, width=0.04, facecolor="blue", alpha=0.3)
+            bars0 = ax.bar(
+                bins[:-1] + 0.025, top0, width=0.04, facecolor="red", alpha=0.3
+            )
+            bars1 = ax.bar(
+                bins[:-1] + 0.025, -top1, width=0.04, facecolor="blue", alpha=0.3
+            )
 
             for bars in (bars0, bars1):
                 for bar in bars:
                     bar.set_edgecolor("black")
 
         def make_hists(idata, i, axs):
-            p_i = az.extract(idata)['p'][:, i].values
-            bins = np.arange(0.025, .99, 0.005)
+            p_i = az.extract(idata)["p"][:, i].values
+            bins = np.arange(0.025, 0.99, 0.005)
             top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins)
             top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins)
             plot_weights(bins, top0, top1, axs[0])
@@ -1664,68 +1681,114 @@ def make_hists(idata, i, axs):
 
         fig, axs = plt.subplot_mosaic(mosaic, figsize=(20, 13))
         axs = [axs[k] for k in axs.keys()]
-        axs[0].axvline(0.1, linestyle='--', label='Low Extreme Propensity Scores', color='black')
-        axs[0].axvline(0.9, linestyle='--', label='Hi Extreme Propensity Scores', color='black')
-        axs[0].set_title("Draws from the Posterior \n  Propensity Scores Distribution", fontsize=20)
-
-        [make_hists(idata, i, axs) for i in range(prop_draws)];
-        ate_df = pd.DataFrame([self.get_ate(i, idata, method=method) for i in range(ate_draws)], columns=['ATE', 'Y(1)', 'Y(0)'])
-        axs[1].hist(ate_df['Y(1)'], label='E(Y(1))', ec='black', bins=10, alpha=0.8, color='blue');
-        axs[1].hist(ate_df['Y(0)'], label='E(Y(0))', ec='black', bins=10, alpha=0.8, color='red');
+        axs[0].axvline(
+            0.1, linestyle="--", label="Low Extreme Propensity Scores", color="black"
+        )
+        axs[0].axvline(
+            0.9, linestyle="--", label="Hi Extreme Propensity Scores", color="black"
+        )
+        axs[0].set_title(
+            "Draws from the Posterior \n  Propensity Scores Distribution", fontsize=20
+        )
+
+        [make_hists(idata, i, axs) for i in range(prop_draws)]
+        ate_df = pd.DataFrame(
+            [self.get_ate(i, idata, method=method) for i in range(ate_draws)],
+            columns=["ATE", "Y(1)", "Y(0)"],
+        )
+        axs[1].hist(
+            ate_df["Y(1)"],
+            label="E(Y(1))",
+            ec="black",
+            bins=10,
+            alpha=0.8,
+            color="blue",
+        )
+        axs[1].hist(
+            ate_df["Y(0)"], label="E(Y(0))", ec="black", bins=10, alpha=0.8, color="red"
+        )
         axs[1].legend()
-        axs[1].set_title(f'The Outcomes \n Under the {method} re-weighting scheme', fontsize=20)
-        axs[2].hist(ate_df['ATE'], label= 'ATE',  ec='black', bins=10, color='slateblue', alpha=0.6);
-        axs[2].axvline(ate_df['ATE'].mean(), label='E(ATE)')
+        axs[1].set_title(
+            f"The Outcomes \n Under the {method} re-weighting scheme", fontsize=20
+        )
+        axs[2].hist(
+            ate_df["ATE"],
+            label="ATE",
+            ec="black",
+            bins=10,
+            color="slateblue",
+            alpha=0.6,
+        )
+        axs[2].axvline(ate_df["ATE"].mean(), label="E(ATE)")
         axs[2].legend()
-        axs[2].set_title("Average Treatment Effect", fontsize=20);
-
+        axs[2].set_title("Average Treatment Effect", fontsize=20)
 
     def weighted_percentile(self, data, weights, perc):
         """
         perc : percentile in [0-1]!
         """
         ix = np.argsort(data)
-        data = data[ix] # sort data
-        weights = weights[ix] # sort weights
-        cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
+        data = data[ix]  # sort data
+        weights = weights[ix]  # sort weights
+        cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(
+            weights
+        )  # 'like' a CDF function
         return np.interp(perc, cdf, data)
-    
+
     def plot_balance_ecdf(self, covariate, idata=None, weighting_scheme=None):
         if idata is None:
             idata = self.idata
-        if weighting_scheme is None: 
+        if weighting_scheme is None:
             weighting_scheme = self.weighting_scheme
-        
-        ps = az.extract(idata)['p'].mean(dim='sample').values
+
+        ps = az.extract(idata)["p"].mean(dim="sample").values
         X = pd.DataFrame(self.X, columns=self.labels)
-        X['ps'] = ps
+        X["ps"] = ps
         t = self.t.flatten()
-        if weighting_scheme == 'raw': 
+        if weighting_scheme == "raw":
             w1 = 1 / ps[t == 1]
-            w0 = 1 / (1-ps[t == 0])
-        elif weighting_scheme == 'robust':
+            w0 = 1 / (1 - ps[t == 0])
+        elif weighting_scheme == "robust":
             p_of_t = np.mean(t)
-            w1 = p_of_t /  (ps[t == 1]) 
+            w1 = p_of_t / (ps[t == 1])
             w0 = (1 - p_of_t) / (1 - ps[t == 0])
         else:
-            w1 = (1-ps[t == 1])*t[t==1]
-            w0 = (ps[t == 0]*(1-t[t==0]))
+            w1 = (1 - ps[t == 1]) * t[t == 1]
+            w0 = ps[t == 0] * (1 - t[t == 0])
         fig, axs = plt.subplots(1, 2, figsize=(20, 6))
-        raw_trt = [self.weighted_percentile(X[t == 1][covariate].values, np.ones(len(X[t == 1])), p) for p in np.linspace(0, 1, 1000)]
-        raw_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, np.ones(len(X[t == 0])), p) for p in np.linspace(0, 1, 1000)]
-        w_trt = [self.weighted_percentile(X[t == 1][covariate].values, w1, p) for p in np.linspace(0, 1, 1000)]
-        w_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, w0, p) for p in np.linspace(0, 1, 1000)]
-        axs[0].plot(np.linspace(0, 1, 1000), raw_trt, color='blue', label='Raw Treated')
-        axs[0].plot(np.linspace(0, 1, 1000), raw_ntrt, color='red', label='Raw Control')
+        raw_trt = [
+            self.weighted_percentile(
+                X[t == 1][covariate].values, np.ones(len(X[t == 1])), p
+            )
+            for p in np.linspace(0, 1, 1000)
+        ]
+        raw_ntrt = [
+            self.weighted_percentile(
+                X[t == 0][covariate].values, np.ones(len(X[t == 0])), p
+            )
+            for p in np.linspace(0, 1, 1000)
+        ]
+        w_trt = [
+            self.weighted_percentile(X[t == 1][covariate].values, w1, p)
+            for p in np.linspace(0, 1, 1000)
+        ]
+        w_ntrt = [
+            self.weighted_percentile(X[t == 0][covariate].values, w0, p)
+            for p in np.linspace(0, 1, 1000)
+        ]
+        axs[0].plot(np.linspace(0, 1, 1000), raw_trt, color="blue", label="Raw Treated")
+        axs[0].plot(np.linspace(0, 1, 1000), raw_ntrt, color="red", label="Raw Control")
         axs[0].set_title(f"ECDF \n Raw: {covariate}")
-        axs[1].set_title(f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}")
-        axs[1].plot(np.linspace(0, 1, 1000), w_trt, color='blue', label='Reweighted Treated')
-        axs[1].plot(np.linspace(0, 1, 1000), w_ntrt, color='red', label='Reweighted Control')
+        axs[1].set_title(
+            f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}"
+        )
+        axs[1].plot(
+            np.linspace(0, 1, 1000), w_trt, color="blue", label="Reweighted Treated"
+        )
+        axs[1].plot(
+            np.linspace(0, 1, 1000), w_ntrt, color="red", label="Reweighted Control"
+        )
         axs[1].set_xlabel("Quantiles")
         axs[0].set_xlabel("Quantiles")
         axs[1].legend()
         axs[0].legend()
-
-
-        
-
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -412,7 +412,6 @@ def build_model(self, X, t, coords):
             p = pm.Deterministic("p", pm.math.invlogit(mu))
             t_pred = pm.Bernoulli("t_pred", p=p, observed=t_data, dims="obs_ind")
 
-               
     def fit(self, X, t, coords):
         """Draw samples from posterior, prior predictive, and posterior predictive
         distributions.
@@ -424,4 +423,4 @@ def fit(self, X, t, coords):
             self.idata.extend(
                 pm.sample_posterior_predictive(self.idata, progressbar=False)
             )
-        return self.idata
+        return self.idata

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`"anova1": {"filename": "ancova_generated.csv"},`
`21`	`21`	`"geolift1": {"filename": "geolift1.csv"},`
`22`	`22`	`"risk": {"filename": "AJR2001.csv"},`
`23`		`- "nhefs": {"filename": "nhefs.csv"}`
	`23`	`+ "nhefs": {"filename": "nhefs.csv"},`
`24`	`24`	`}`
`25`	`25`
`26`	`26`