Skip to content

Commit 994799b

Browse files
committed
updated notebook and site map
Signed-off-by: Nathaniel <[email protected]>
1 parent 06a0d25 commit 994799b

File tree

7 files changed

+803
-430
lines changed

7 files changed

+803
-430
lines changed

causalpy/pymc_experiments.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,5 +1680,52 @@ def make_hists(idata, i, axs):
16801680
axs[2].set_title("Average Treatment Effect", fontsize=20);
16811681

16821682

1683+
def weighted_percentile(self, data, weights, perc):
1684+
"""
1685+
perc : percentile in [0-1]!
1686+
"""
1687+
ix = np.argsort(data)
1688+
data = data[ix] # sort data
1689+
weights = weights[ix] # sort weights
1690+
cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
1691+
return np.interp(perc, cdf, data)
1692+
1693+
def plot_balance_ecdf(self, covariate, idata=None, weighting_scheme=None):
1694+
if idata is None:
1695+
idata = self.idata
1696+
if weighting_scheme is None:
1697+
weighting_scheme = self.weighting_scheme
1698+
1699+
ps = az.extract(idata)['p'].mean(dim='sample').values
1700+
X = pd.DataFrame(self.X, columns=self.labels)
1701+
X['ps'] = ps
1702+
t = self.t.flatten()
1703+
if weighting_scheme == 'raw':
1704+
w1 = 1 / ps[t == 1]
1705+
w0 = 1 / (1-ps[t == 0])
1706+
elif weighting_scheme == 'robust':
1707+
p_of_t = np.mean(t)
1708+
w1 = p_of_t / (ps[t == 1])
1709+
w0 = (1 - p_of_t) / (1 - ps[t == 0])
1710+
else:
1711+
w1 = (1-ps[t == 1])*t[t==1]
1712+
w0 = (ps[t == 0]*(1-t[t==0]))
1713+
fig, axs = plt.subplots(1, 2, figsize=(20, 6))
1714+
raw_trt = [self.weighted_percentile(X[t == 1][covariate].values, np.ones(len(X[t == 1])), p) for p in np.linspace(0, 1, 1000)]
1715+
raw_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, np.ones(len(X[t == 0])), p) for p in np.linspace(0, 1, 1000)]
1716+
w_trt = [self.weighted_percentile(X[t == 1][covariate].values, w1, p) for p in np.linspace(0, 1, 1000)]
1717+
w_ntrt = [self.weighted_percentile(X[t == 0][covariate].values, w0, p) for p in np.linspace(0, 1, 1000)]
1718+
axs[0].plot(np.linspace(0, 1, 1000), raw_trt, color='blue', label='Raw Treated')
1719+
axs[0].plot(np.linspace(0, 1, 1000), raw_ntrt, color='red', label='Raw Control')
1720+
axs[0].set_title(f"ECDF \n Raw: {covariate}")
1721+
axs[1].set_title(f"ECDF \n Weighted {weighting_scheme} adjustment for {covariate}")
1722+
axs[1].plot(np.linspace(0, 1, 1000), w_trt, color='blue', label='Reweighted Treated')
1723+
axs[1].plot(np.linspace(0, 1, 1000), w_ntrt, color='red', label='Reweighted Control')
1724+
axs[1].set_xlabel("Quantiles")
1725+
axs[0].set_xlabel("Quantiles")
1726+
axs[1].legend()
1727+
axs[0].legend()
1728+
1729+
16831730

16841731

causalpy/pymc_models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ class PropensityScore(ModelBuilder):
374374
Defines the PyMC model
375375
376376
.. math::
377-
\\beta &\sim \mathrm{Normal}(0, 50)
377+
\\beta &\sim \mathrm{Normal}(0, 1)
378378
379379
\sigma &\sim \mathrm{HalfNormal}(1)
380380
@@ -389,13 +389,13 @@ class PropensityScore(ModelBuilder):
389389
>>> import causalpy as cp
390390
>>> import numpy as np
391391
>>> from causalpy.pymc_models import PropensityScore
392-
>>> rd = cp.load_data("rd")
393-
>>> X = rd[["x", "treated"]]
394-
>>> y = np.asarray(rd["y"]).reshape((rd["y"].shape[0],1))
392+
>>> df = cp.load_data('nhefs')
393+
>>> X = df[["outcome", "trt", "age", "race"]]
394+
>>> t = np.asarray(df["trt"]).reshape((df["trt"].shape[0],1))
395395
>>> ps = PropensityScore(sample_kwargs={"progressbar": False})
396396
>>> ps.fit(X, y, coords={
397-
... 'coeffs': ['x', 'treated'],
398-
... 'obs_indx': np.arange(rd.shape[0])
397+
... 'coeffs': ['trt', 'age', 'race'],
398+
... 'obs_indx': np.arange(df.shape[0])
399399
... },
400400
... )
401401
Inference...
270 KB
Loading

docs/source/examples.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,11 @@ Instrumental Variables Regression
6868
:titlesonly:
6969

7070
notebooks/iv_pymc.ipynb
71+
72+
Inverse Propensity Score Weighting
73+
=================================
74+
75+
.. toctree::
76+
:titlesonly:
77+
78+
notebooks/inv_prop_pymc.ipynb

docs/source/glossary.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ Glossary
9393
2SLS
9494
An estimation technique for estimating the parameters of an IV regression. It takes its name from the fact that it uses two OLS regressions - a first and second stage.
9595

96+
Propensity scores
97+
An estimate of the probability of adopting a treatment status. Used in re-weighting schemes to balance observational data.
98+
9699

97100
References
98101
----------

docs/source/index.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ Instrumental Variable regression is an appropriate technique when you wish to es
122122

123123
.. image:: _static/iv_reg2.png
124124

125+
Inverse Propensity Score Weighting
126+
""""""""""""""""""""""""""""""""
127+
Inverse Propensity Score Weighting is a technique used to correct selection effects in observational data by re-weighting observations to better reflect an as-if random allocation to treatment status. This helps recover unbiased causal effect estimates.
128+
129+
.. image:: _static/propensity_weight.png
130+
125131

126132
Support
127133
-------

docs/source/notebooks/inv_prop_pymc.ipynb

Lines changed: 733 additions & 424 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)