updated notebook and site map

NathanielF · NathanielF · commit 994799b0344d · 2024-03-17T14:14:07.000Z
Signed-off-by: Nathaniel &lt;NathanielF@users.noreply.github.com&gt;
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -1680,5 +1680,52 @@ def make_hists(idata, i, axs):
         axs[2].set_title("Average Treatment Effect", fontsize=20);
 
 
+    def weighted_percentile(self, data, weights, perc):
+        """
+        perc : percentile in [0-1]!
+        """
+        ix = np.argsort(data)
+        data = data[ix] # sort data
+        weights = weights[ix] # sort weights
+        cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
+        return np.interp(perc, cdf, data)
+    
+    def plot_balance_ecdf(self, covariate, idata=None, weighting_scheme=None):
+        if idata is None:
+            idata = self.idata
+        if weighting_scheme is None: 
+            weighting_scheme = self.weighting_scheme
+        
+        ps = az.extract(idata)['p'].mean(dim='sample').values
+        X = pd.DataFrame(self.X, columns=self.labels)
+        X['ps'] = ps
+        t = self.t.flatten()
+        if weighting_scheme == 'raw': 
+            w1 = 1 / ps[t == 1]
+            w0 = 1 / (1-ps[t == 0])
+        elif weighting_scheme == 'robust':
+            p_of_t = np.mean(t)
+            w1 = p_of_t /  (ps[t == 1]) 
+            w0 = (1 - p_of_t) / (1 - ps[t == 0])
+        else:
+            w1 = (1-ps[t == 1])*t[t==1]
+            w0 = (ps[t == 0]*(1-t[t==0]))
+        fig, axs = plt.subplots(1, 2, figsize=(20, 6))
+        raw_trt = [self.weighted_percentile(X[t == 1][covariate].values, np.ones(len(X[t == 1])), p) for p in np.linspace(0, 1, 1000)]
+        raw_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, np.ones(len(X[t == 0])), p) for p in np.linspace(0, 1, 1000)]
+        w_trt = [self.weighted_percentile(X[t == 1][covariate].values, w1, p) for p in np.linspace(0, 1, 1000)]
+        w_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, w0, p) for p in np.linspace(0, 1, 1000)]
+        axs[0].plot(np.linspace(0, 1, 1000), raw_trt, color='blue', label='Raw Treated')
+        axs[0].plot(np.linspace(0, 1, 1000), raw_ntrt, color='red', label='Raw Control')
+        axs[0].set_title(f"ECDF \n Raw: {covariate}")
+        axs[1].set_title(f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}")
+        axs[1].plot(np.linspace(0, 1, 1000), w_trt, color='blue', label='Reweighted Treated')
+        axs[1].plot(np.linspace(0, 1, 1000), w_ntrt, color='red', label='Reweighted Control')
+        axs[1].set_xlabel("Quantiles")
+        axs[0].set_xlabel("Quantiles")
+        axs[1].legend()
+        axs[0].legend()
+
+
         
 
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -374,7 +374,7 @@ class PropensityScore(ModelBuilder):
     Defines the PyMC model
 
     .. math::
-        \\beta &\sim \mathrm{Normal}(0, 50)
+        \\beta &\sim \mathrm{Normal}(0, 1)
 
         \sigma &\sim \mathrm{HalfNormal}(1)
 
@@ -389,13 +389,13 @@ class PropensityScore(ModelBuilder):
     >>> import causalpy as cp
     >>> import numpy as np
     >>> from causalpy.pymc_models import PropensityScore
-    >>> rd = cp.load_data("rd")
-    >>> X = rd[["x", "treated"]]
-    >>> y = np.asarray(rd["y"]).reshape((rd["y"].shape[0],1))
+    >>> df = cp.load_data('nhefs')
+    >>> X = df[["outcome", "trt", "age", "race"]]
+    >>> t = np.asarray(df["trt"]).reshape((df["trt"].shape[0],1))
     >>> ps = PropensityScore(sample_kwargs={"progressbar": False})
     >>> ps.fit(X, y, coords={
-    ...                 'coeffs': ['x', 'treated'],
-    ...                 'obs_indx': np.arange(rd.shape[0])
+    ...                 'coeffs': ['trt', 'age', 'race'],
+    ...                 'obs_indx': np.arange(df.shape[0])
     ...                },
     ... )
     Inference...
diff --git a/docs/source/_static/propensity_weight.png b/docs/source/_static/propensity_weight.png
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -68,3 +68,11 @@ Instrumental Variables Regression
    :titlesonly:
 
    notebooks/iv_pymc.ipynb
+
+Inverse Propensity Score Weighting
+=================================
+
+.. toctree::
+   :titlesonly:
+
+   notebooks/inv_prop_pymc.ipynb
diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
@@ -93,6 +93,9 @@ Glossary
    2SLS
       An estimation technique for estimating the parameters of an IV regression. It takes its name from the fact that it uses two OLS regressions - a first and second stage.
 
+   Propensity scores
+      An estimate of the probability of adopting a treatment status. Used in re-weighting schemes to balance observational data.
+
 
 References
 ----------
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -122,6 +122,12 @@ Instrumental Variable regression is an appropriate technique when you wish to es
 
 .. image:: _static/iv_reg2.png
 
+Inverse Propensity Score Weighting
+""""""""""""""""""""""""""""""""
+Inverse Propensity Score Weighting is a technique used to correct selection effects in observational data by re-weighting observations to better reflect an as-if random allocation to treatment status. This helps recover unbiased causal effect estimates.
+
+.. image:: _static/propensity_weight.png
+
 
 Support
 -------
diff --git a/docs/source/notebooks/inv_prop_pymc.ipynb b/docs/source/notebooks/inv_prop_pymc.ipynb