Skip to content

Commit 152844a

Browse files
authored
Merge pull request #111 from pymc-labs/ancova
Add pretest/posttest nonequivalent group designs with ANCOVA analysis
2 parents bca7a25 + e108897 commit 152844a

File tree

4 files changed

+684
-0
lines changed

4 files changed

+684
-0
lines changed

causalpy/pymc_experiments.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,3 +600,148 @@ def summary(self):
600600
f"Discontinuity at threshold = {self.discontinuity_at_threshold.mean():.2f}"
601601
)
602602
self.print_coefficients()
603+
604+
605+
class PrePostNEGD(ExperimentalDesign):
606+
"""A class to analyse data from pretest/posttest designs"""
607+
608+
def __init__(
609+
self,
610+
data: pd.DataFrame,
611+
formula: str,
612+
group_variable_name: str,
613+
pretreatment_variable_name: str,
614+
prediction_model=None,
615+
**kwargs,
616+
):
617+
super().__init__(prediction_model=prediction_model, **kwargs)
618+
self.data = data
619+
self.expt_type = "Pretest/posttest Nonequivalent Group Design"
620+
self.formula = formula
621+
self.group_variable_name = group_variable_name
622+
self.pretreatment_variable_name = pretreatment_variable_name
623+
624+
y, X = dmatrices(formula, self.data)
625+
self._y_design_info = y.design_info
626+
self._x_design_info = X.design_info
627+
self.labels = X.design_info.column_names
628+
self.y, self.X = np.asarray(y), np.asarray(X)
629+
self.outcome_variable_name = y.design_info.column_names[0]
630+
631+
# Input validation ----------------------------------------------------
632+
# Check that `group_variable_name` has TWO levels, representing the
633+
# treated/untreated. But it does not matter what the actual names of
634+
# the levels are.
635+
assert (
636+
len(pd.Categorical(self.data[self.group_variable_name]).categories) == 2
637+
), f"""
638+
There must be 2 levels of the grouping variable {self.group_variable_name}
639+
.I.e. the treated and untreated.
640+
"""
641+
642+
# fit the model to the observed (pre-intervention) data
643+
COORDS = {"coeffs": self.labels, "obs_indx": np.arange(self.X.shape[0])}
644+
self.prediction_model.fit(X=self.X, y=self.y, coords=COORDS)
645+
646+
# Calculate the posterior predictive for the treatment and control for an
647+
# interpolated set of pretest values
648+
# get the model predictions of the observed data
649+
self.pred_xi = np.linspace(
650+
np.min(self.data[self.pretreatment_variable_name]),
651+
np.max(self.data[self.pretreatment_variable_name]),
652+
200,
653+
)
654+
# untreated
655+
x_pred_untreated = pd.DataFrame(
656+
{
657+
self.pretreatment_variable_name: self.pred_xi,
658+
self.group_variable_name: np.zeros(self.pred_xi.shape),
659+
}
660+
)
661+
(new_x,) = build_design_matrices([self._x_design_info], x_pred_untreated)
662+
self.pred_untreated = self.prediction_model.predict(X=np.asarray(new_x))
663+
# treated
664+
x_pred_untreated = pd.DataFrame(
665+
{
666+
self.pretreatment_variable_name: self.pred_xi,
667+
self.group_variable_name: np.ones(self.pred_xi.shape),
668+
}
669+
)
670+
(new_x,) = build_design_matrices([self._x_design_info], x_pred_untreated)
671+
self.pred_treated = self.prediction_model.predict(X=np.asarray(new_x))
672+
673+
# Evaluate causal impact as equal to the trestment effect
674+
self.causal_impact = self.prediction_model.idata.posterior["beta"].sel(
675+
{"coeffs": self._get_treatment_effect_coeff()}
676+
)
677+
678+
# ================================================================
679+
680+
def plot(self):
681+
"""Plot the results"""
682+
fig, ax = plt.subplots(
683+
2, 1, figsize=(7, 9), gridspec_kw={"height_ratios": [3, 1]}
684+
)
685+
686+
# Plot raw data
687+
sns.scatterplot(
688+
x="pre",
689+
y="post",
690+
hue="group",
691+
alpha=0.5,
692+
data=self.data,
693+
ax=ax[0],
694+
)
695+
ax[0].set(xlabel="Pretest", ylabel="Posttest")
696+
697+
# plot posterior predictive of untreated
698+
plot_xY(
699+
self.pred_xi,
700+
self.pred_untreated["posterior_predictive"].y_hat,
701+
ax=ax[0],
702+
plot_hdi_kwargs={"color": "C0"},
703+
)
704+
705+
# plot posterior predictive of treated
706+
plot_xY(
707+
self.pred_xi,
708+
self.pred_treated["posterior_predictive"].y_hat,
709+
ax=ax[0],
710+
plot_hdi_kwargs={"color": "C1"},
711+
)
712+
713+
ax[0].legend(fontsize=LEGEND_FONT_SIZE)
714+
715+
# Plot estimated caual impact / treatment effect
716+
az.plot_posterior(self.causal_impact, ref_val=0, ax=ax[1])
717+
ax[1].set(title="Estimated treatment effect")
718+
return fig, ax
719+
720+
def _causal_impact_summary_stat(self):
721+
percentiles = self.causal_impact.quantile([0.03, 1 - 0.03]).values
722+
ci = r"$CI_{94\%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
723+
causal_impact = f"{self.causal_impact.mean():.2f}, "
724+
return f"Causal impact = {causal_impact + ci}"
725+
726+
def summary(self):
727+
"""Print text output summarising the results"""
728+
729+
print(f"{self.expt_type:=^80}")
730+
print(f"Formula: {self.formula}")
731+
print("\nResults:")
732+
# TODO: extra experiment specific outputs here
733+
print(self._causal_impact_summary_stat())
734+
self.print_coefficients()
735+
736+
def _get_treatment_effect_coeff(self) -> str:
737+
"""Find the beta regression coefficient corresponding to the
738+
group (i.e. treatment) effect.
739+
For example if self.group_variable_name is 'group' and
740+
the labels are `['Intercept', 'C(group)[T.1]', 'pre']`
741+
then we want `C(group)[T.1]`.
742+
"""
743+
for label in self.labels:
744+
if ("group" in label) & (":" not in label):
745+
return label
746+
747+
raise NameError("Unable to find coefficient name for the treatment effect")

docs/examples.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
Nonequivalent group designs
2+
===========================
3+
4+
.. toctree::
5+
:titlesonly:
6+
7+
notebooks/ancova_pymc.ipynb
8+
9+
110
Synthetic Control
211
=================
312

docs/glossary.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
Some of the definitions have been copied from (or inspired by) various resources, including Reichardt (2019).
66
</div>
77

8+
9+
**ANCOVA:** Analysis of covariance is a simple linear model, typically with one continuous predictor (the covariate) and a catgeorical variable (which may correspond to treatment or control group). In the context of this package, ANCOVA could be useful in pretest-postdest designs, either with or without random assignment.
10+
811
**Average treatment effect (ATE):** The average treatement effect across all units.
912

1013
**Average treatment effect on the treated (ATT):** The average effect of the treatment on the units that recieved it. Also called Treatment on the treated.
@@ -23,6 +26,20 @@ Some of the definitions have been copied from (or inspired by) various resources
2326

2427
**Non-equivalent group designs:** A quasi-experimental design where units are assigned to conditions non-randomly, and not according to a running variable (see Regression discontinuity design).
2528

29+
The basic posttest-only design can be described as:
30+
31+
| NR: | X | O |
32+
|-----|---|---|
33+
| NR: | | 0 |
34+
35+
The pretest-posttest NEGD can be described as:
36+
37+
| NR: | O1 | X | O2 |
38+
|-----|----|---|----|
39+
| NR: | O1 | | O2 |
40+
41+
Data from pretest-posttest NEGD could be analysed using change score analysis or the difference in differences approach, or with ANCOVA.
42+
2643
**One-group posttest-only design:** A design where a single group is exposed to a treatment and assessed on an outcome measure. There is no pretest measure or comparison group.
2744

2845
**Panel data:** Time series data collected on multiple units where the same units are observed at each time point.

docs/notebooks/ancova_pymc.ipynb

Lines changed: 513 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)