|
30 | 30 | RegressionKinkDataValidator,
|
31 | 31 | PrePostNEGDDataValidator,
|
32 | 32 | IVDataValidator,
|
| 33 | + PropensityDataValidator |
33 | 34 | )
|
34 | 35 | from causalpy.plot_utils import plot_xY
|
35 | 36 | from causalpy.utils import round_num
|
@@ -1477,3 +1478,207 @@ def get_naive_OLS_fit(self):
|
1477 | 1478 | beta_params.insert(0, ols_reg.intercept_[0])
|
1478 | 1479 | self.ols_beta_params = dict(zip(self._x_design_info.column_names, beta_params))
|
1479 | 1480 | self.ols_reg = ols_reg
|
| 1481 | + |
| 1482 | + |
| 1483 | +class InversePropensityWeighting(ExperimentalDesign, PropensityDataValidator): |
| 1484 | + """ |
| 1485 | + A class to analyse inverse propensity weighting experiments. |
| 1486 | +
|
| 1487 | + :param data: |
| 1488 | + A pandas dataframe |
| 1489 | + :param formula: |
| 1490 | + A statistical model formula for the propensity model |
| 1491 | + :param outcome_variable |
| 1492 | + A string denoting the outcome variable in datq to be reweighted |
| 1493 | + :param weighting_scheme: |
| 1494 | + A string denoting which weighting scheme to use among: 'raw', 'robust', |
| 1495 | + 'doubly robust' |
| 1496 | + :param model: |
| 1497 | + A PyMC model |
| 1498 | +
|
| 1499 | + Example |
| 1500 | + -------- |
| 1501 | + >>> import causalpy as cp |
| 1502 | + >>> df = cp.load_data("rd") |
| 1503 | + >>> seed = 42 |
| 1504 | + >>> result = cp.pymc_experiments.InversePropensityWeighting( |
| 1505 | + ... df, |
| 1506 | + ... formula="t ~ 1 + x", |
| 1507 | + ... outcome_variable ="y", |
| 1508 | + ... weighting_scheme="robust", |
| 1509 | + ... model=cp.pymc_models.PropensityScore( |
| 1510 | + ... sample_kwargs={ |
| 1511 | + ... "draws": 100, |
| 1512 | + ... "target_accept": 0.95, |
| 1513 | + ... "random_seed": seed, |
| 1514 | + ... "progressbar": False, |
| 1515 | + ... }, |
| 1516 | + ... ), |
| 1517 | + ... ) |
| 1518 | + """ |
| 1519 | + |
| 1520 | + def __init__( |
| 1521 | + self, |
| 1522 | + data: pd.DataFrame, |
| 1523 | + formula: str, |
| 1524 | + outcome_variable: str, |
| 1525 | + weighting_scheme: str, |
| 1526 | + model=None, |
| 1527 | + **kwargs, |
| 1528 | + ): |
| 1529 | + super().__init__(model=model, **kwargs) |
| 1530 | + self.expt_type = "Inverse Propensity Score Weighting" |
| 1531 | + self.data = data |
| 1532 | + self.formula = formula |
| 1533 | + self.outcome_variable = outcome_variable |
| 1534 | + self.weighting_scheme = weighting_scheme |
| 1535 | + self._input_validation() |
| 1536 | + |
| 1537 | + t, X = dmatrices(formula, self.data) |
| 1538 | + self._t_design_info = t.design_info |
| 1539 | + self._t_design_info = X.design_info |
| 1540 | + self.labels = X.design_info.column_names |
| 1541 | + self.t, self.X = np.asarray(t), np.asarray(X) |
| 1542 | + self.y = self.data[self.outcome_variable] |
| 1543 | + |
| 1544 | + COORDS = {"obs_ind": list(range(self.X.shape[0])), "coeffs": self.labels} |
| 1545 | + self.coords = COORDS |
| 1546 | + self.model.fit( |
| 1547 | + X=self.X, t=self.t, coords=COORDS |
| 1548 | + ) |
| 1549 | + |
| 1550 | + def make_robust_adjustments(self, ps): |
| 1551 | + X = pd.DataFrame(self.X, columns=self.labels) |
| 1552 | + X['ps'] = ps |
| 1553 | + X[self.outcome_variable] = self.y |
| 1554 | + t = self.t.flatten() |
| 1555 | + p_of_t = np.mean(t) |
| 1556 | + X["i_ps"] = np.where(t==1, (p_of_t / X["ps"]), (1 - p_of_t) / (1 - X["ps"])) |
| 1557 | + n_ntrt = X[t == 0].shape[0] |
| 1558 | + n_trt = X[t == 1].shape[0] |
| 1559 | + outcome_trt = X[t == 1][self.outcome_variable] |
| 1560 | + outcome_ntrt = X[t == 0][self.outcome_variable] |
| 1561 | + i_propensity0 = X[t == 0]["i_ps"] |
| 1562 | + i_propensity1 = X[t == 1]["i_ps"] |
| 1563 | + weighted_outcome1 = outcome_trt * i_propensity1 |
| 1564 | + weighted_outcome0 = outcome_ntrt * i_propensity0 |
| 1565 | + return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt |
| 1566 | + |
| 1567 | + |
| 1568 | + def make_raw_adjustments(self, ps): |
| 1569 | + X = pd.DataFrame(self.X, columns=self.labels) |
| 1570 | + X['ps'] = ps |
| 1571 | + X[self.outcome_variable] = self.y |
| 1572 | + t = self.t.flatten() |
| 1573 | + X["ps"] = np.where(t, X["ps"], 1 - X["ps"]) |
| 1574 | + X["i_ps"] = 1 / X["ps"] |
| 1575 | + n_ntrt = n_trt = len(X) |
| 1576 | + outcome_trt = X[t == 1][self.outcome_variable] |
| 1577 | + outcome_ntrt = X[t == 0][self.outcome_variable] |
| 1578 | + i_propensity0 = X[t == 0]["i_ps"] |
| 1579 | + i_propensity1 = X[t == 1]["i_ps"] |
| 1580 | + weighted_outcome1 = outcome_trt * i_propensity1 |
| 1581 | + weighted_outcome0 = outcome_ntrt * i_propensity0 |
| 1582 | + return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt |
| 1583 | + |
| 1584 | + |
| 1585 | + def make_overlap_adjustments(self, ps): |
| 1586 | + X = pd.DataFrame(self.X, columns=self.labels) |
| 1587 | + X['ps'] = ps |
| 1588 | + X[self.outcome_variable] = self.y |
| 1589 | + t = self.t.flatten() |
| 1590 | + X["i_ps"] = np.where(t, (1-X["ps"])*t, X["ps"]*(1-t)) |
| 1591 | + n_ntrt = (1-t[t == 0])*X[t == 0]['i_ps'] |
| 1592 | + n_trt = t[t == 1]*X[t == 1]['i_ps'] |
| 1593 | + outcome_trt = X[t == 1][self.outcome_variable] |
| 1594 | + outcome_ntrt = X[t == 0][self.outcome_variable] |
| 1595 | + i_propensity0 = X[t == 0]["i_ps"] |
| 1596 | + i_propensity1 = X[t == 1]["i_ps"] |
| 1597 | + weighted_outcome1 = t[t == 1]*outcome_trt * i_propensity1 |
| 1598 | + weighted_outcome0 = (1-t[t == 0])*outcome_ntrt * i_propensity0 |
| 1599 | + return weighted_outcome0, weighted_outcome1, n_ntrt, n_trt |
| 1600 | + |
| 1601 | + |
| 1602 | + def make_doubly_robust_adjustment(self, ps): |
| 1603 | + X = pd.DataFrame(self.X, columns=self.labels) |
| 1604 | + X['ps'] = ps |
| 1605 | + t = self.t.flatten() |
| 1606 | + m0 = sk_lin_reg().fit(X[t == 0].astype(float), self.y[t == 0]) |
| 1607 | + m1 = sk_lin_reg().fit(X[t == 1].astype(float), self.y[t == 1]) |
| 1608 | + m0_pred = m0.predict(X) |
| 1609 | + m1_pred = m1.predict(X) |
| 1610 | + ## Compromise between outcome and treatement assignment model |
| 1611 | + weighted_outcome0 = (1 - t) * (self.y - m0_pred) / (1 - X["ps"]) + m0_pred |
| 1612 | + weighted_outcome1 = t * (self.y - m1_pred) / X["ps"] + m1_pred |
| 1613 | + return weighted_outcome0, weighted_outcome1, None, None |
| 1614 | + |
| 1615 | + def get_ate(self, i, idata, method="doubly_robust"): |
| 1616 | + ### Post processing the sample posterior distribution for propensity scores |
| 1617 | + ### One sample at a time. |
| 1618 | + ps = idata["posterior"]["p"].stack(z=("chain", "draw"))[:, i].values |
| 1619 | + if method == "robust": |
| 1620 | + weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_robust_adjustments(ps) |
| 1621 | + ntrt = weighted_outcome_ntrt.sum() / n_ntrt |
| 1622 | + trt = weighted_outcome_trt.sum() / n_trt |
| 1623 | + elif method == "raw": |
| 1624 | + weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_raw_adjustments(ps) |
| 1625 | + ntrt = weighted_outcome_ntrt.sum() / n_ntrt |
| 1626 | + trt = weighted_outcome_trt.sum() / n_trt |
| 1627 | + elif method == "overlap": |
| 1628 | + weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_overlap_adjustments(ps) |
| 1629 | + ntrt = np.sum(weighted_outcome_ntrt) / np.sum(n_ntrt) |
| 1630 | + trt = np.sum(weighted_outcome_trt) / np.sum(n_trt) |
| 1631 | + else: |
| 1632 | + weighted_outcome_ntrt, weighted_outcome_trt, n_ntrt, n_trt = self.make_doubly_robust_adjustment( |
| 1633 | + ps |
| 1634 | + ) |
| 1635 | + trt = np.mean(weighted_outcome_trt) |
| 1636 | + ntrt = np.mean(weighted_outcome_ntrt) |
| 1637 | + ate = trt - ntrt |
| 1638 | + return [ate, trt, ntrt] |
| 1639 | + |
| 1640 | + def plot_ATE(self, idata=None, method=None, prop_draws=100, ate_draws=300): |
| 1641 | + if idata is None: |
| 1642 | + idata = self.idata |
| 1643 | + if method is None: |
| 1644 | + method = self.weighting_scheme |
| 1645 | + |
| 1646 | + def plot_weights(bins, top0, top1, ax): |
| 1647 | + ax.axhline(0, c="gray", linewidth=1) |
| 1648 | + bars0 = ax.bar(bins[:-1] + 0.025, top0, width=0.04, facecolor="red", alpha=0.3) |
| 1649 | + bars1 = ax.bar(bins[:-1] + 0.025, -top1, width=0.04, facecolor="blue", alpha=0.3) |
| 1650 | + |
| 1651 | + for bars in (bars0, bars1): |
| 1652 | + for bar in bars: |
| 1653 | + bar.set_edgecolor("black") |
| 1654 | + |
| 1655 | + def make_hists(idata, i, axs): |
| 1656 | + p_i = az.extract(idata)['p'][:, i].values |
| 1657 | + bins = np.arange(0.025, .99, 0.005) |
| 1658 | + top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins) |
| 1659 | + top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins) |
| 1660 | + plot_weights(bins, top0, top1, axs[0]) |
| 1661 | + |
| 1662 | + mosaic = """AAAAAA |
| 1663 | + BBBBCC""" |
| 1664 | + |
| 1665 | + fig, axs = plt.subplot_mosaic(mosaic, figsize=(20, 13)) |
| 1666 | + axs = [axs[k] for k in axs.keys()] |
| 1667 | + axs[0].axvline(0.1, linestyle='--', label='Low Extreme Propensity Scores', color='black') |
| 1668 | + axs[0].axvline(0.9, linestyle='--', label='Hi Extreme Propensity Scores', color='black') |
| 1669 | + axs[0].set_title("Draws from the Posterior \n Propensity Scores Distribution", fontsize=20) |
| 1670 | + |
| 1671 | + [make_hists(idata, i, axs) for i in range(prop_draws)]; |
| 1672 | + ate_df = pd.DataFrame([self.get_ate(i, idata, method=method) for i in range(ate_draws)], columns=['ATE', 'Y(1)', 'Y(0)']) |
| 1673 | + axs[1].hist(ate_df['Y(1)'], label='E(Y(1))', ec='black', bins=10, alpha=0.8, color='blue'); |
| 1674 | + axs[1].hist(ate_df['Y(0)'], label='E(Y(0))', ec='black', bins=10, alpha=0.8, color='red'); |
| 1675 | + axs[1].legend() |
| 1676 | + axs[1].set_title(f'The Outcomes \n Under the {method} re-weighting scheme', fontsize=20) |
| 1677 | + axs[2].hist(ate_df['ATE'], label= 'ATE', ec='black', bins=10, color='slateblue', alpha=0.6); |
| 1678 | + axs[2].axvline(ate_df['ATE'].mean(), label='E(ATE)') |
| 1679 | + axs[2].legend() |
| 1680 | + axs[2].set_title("Average Treatment Effect", fontsize=20); |
| 1681 | + |
| 1682 | + |
| 1683 | + |
| 1684 | + |
0 commit comments