update with Ben and Alex's feedback

NathanielF · NathanielF · commit a38e50a22342 · 2024-04-14T19:07:39.000+01:00
Signed-off-by: Nathaniel &lt;NathanielF@users.noreply.github.com&gt;
diff --git a/causalpy/data_validation.py b/causalpy/data_validation.py
@@ -136,7 +136,7 @@ def _input_validation(self):
 
 
 class PropensityDataValidator:
-    """Mixin class for validating the input data and model formula for IV experiments."""
+    """Mixin class for validating the input data and model formula for Propensity Weighting experiments."""
 
     def _input_validation(self):
         """Validate the input data and model formula for correctness"""
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -1494,7 +1494,9 @@ class InversePropensityWeighting(ExperimentalDesign, PropensityDataValidator):
         A string denoting the outcome variable in datq to be reweighted
     :param weighting_scheme:
         A string denoting which weighting scheme to use among: 'raw', 'robust',
-        'doubly robust'
+        'doubly robust' or 'overlap'. See Aronow and Miller "Foundations
+        of Agnostic Statistics" for discussion and computation of these
+        weighting schemes. 
     :param model:
         A PyMC model
 
@@ -1548,6 +1550,9 @@ def __init__(
         self.model.fit(X=self.X, t=self.t, coords=COORDS)
 
     def make_robust_adjustments(self, ps):
+        """ This estimator is discussed in Aronow
+          and Miller's book as being related to the
+          Horvitz Thompson method """
         X = pd.DataFrame(self.X, columns=self.labels)
         X["ps"] = ps
         X[self.outcome_variable] = self.y
@@ -1565,6 +1570,9 @@ def make_robust_adjustments(self, ps):
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
 
     def make_raw_adjustments(self, ps):
+        """ This estimator is discussed in Aronow and
+        Miller as the simplest of base form of 
+        inverse propensity weighting schemes"""
         X = pd.DataFrame(self.X, columns=self.labels)
         X["ps"] = ps
         X[self.outcome_variable] = self.y
@@ -1581,6 +1589,10 @@ def make_raw_adjustments(self, ps):
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
 
     def make_overlap_adjustments(self, ps):
+        """This weighting scheme was adapted from 
+         Lucy D’Agostino McGowan's blog on 
+         Propensity Score Weights referenced in 
+         the primary CausalPy explainer notebook"""
         X = pd.DataFrame(self.X, columns=self.labels)
         X["ps"] = ps
         X[self.outcome_variable] = self.y
@@ -1597,6 +1609,12 @@ def make_overlap_adjustments(self, ps):
         return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt
 
     def make_doubly_robust_adjustment(self, ps):
+        """ The doubly robust weighting scheme is also
+        discussed in Aronow and Miller, but a bit more generally
+        than our implementation here. Here we have specified
+        the outcome model to be a simple OLS model.
+        In this way the compromise between the outcome model and
+        the propensity model is always done with OLS."""
         X = pd.DataFrame(self.X, columns=self.labels)
         X["ps"] = ps
         t = self.t.flatten()
@@ -1722,8 +1740,9 @@ def make_hists(idata, i, axs, method=method):
             0.9, linestyle="--", label="Hi Extreme Propensity Scores", color="black"
         )
         axs[0].set_title(
-            "Draws from the Posterior \n  Propensity Scores Distribution", fontsize=20
+            "Weighted and Unweighted Draws from the Posterior \n  Propensity Scores Distribution", fontsize=20
         )
+        axs[0].set_ylabel("Counts of Observations")
         axs[0].set_xlabel("Propensity Scores")
         custom_lines = [
             Line2D([0], [0], color="skyblue", lw=2),
diff --git a/docs/source/notebooks/inv_prop_pymc.ipynb b/docs/source/notebooks/inv_prop_pymc.ipynb
diff --git a/docs/source/references.bib b/docs/source/references.bib
@@ -59,6 +59,13 @@ @book{hansenEconometrics
   publisher={Princeton}
 }
 
+@book{aronowFoundations,
+  author={Aronow, P and  Miller, B}, 
+  title={Foundations of Agnostic Statistics}, 
+  publisher={Cambridge University Press},
+  year={2019}
+}
+
 @article{acemoglu2001colonial,
   title={The Colonial Origins of Comparative Development: An Empirical Investigation},
   author={Acemoglu, D and Johnson, S and Robinson, J},
@@ -68,3 +75,12 @@ @article{acemoglu2001colonial
   pages={1369--1401},
   year={2001}
 }
+
+@incollection{forde2024nonparam,
+  author    = {Forde, Nathaniel},
+  title     = {Bayesian Non-parametric Causal Inference},
+  editor    = {PyMC Team},
+  booktitle = {PyMC examples},
+  doi       = {10.5281/zenodo.5654871},
+  year = {2024}
+}