Skip to content

Commit 51a6789

Browse files
post useR fixes
1 parent 6fe3330 commit 51a6789

File tree

45 files changed

+392
-394
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+392
-394
lines changed

exercises/15-bonus-ml-for-causal-exercises.qmd

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ dagify(
6161
geom_dag_point() +
6262
geom_dag_label_repel(
6363
aes(x, y, label = label),
64-
box.padding = 3.5,
64+
box.padding = 3.5,
6565
inherit.aes = FALSE,
66-
max.overlaps = Inf,
66+
max.overlaps = Inf,
6767
family = "sans",
6868
seed = 1630,
69-
label.size = NA,
69+
label.size = NA,
7070
label.padding = 0.1,
7171
size = 14 / 3
7272
) +
@@ -122,7 +122,7 @@ propensity_scores <- predict(propensity_model, type = "response")
122122
# Step 2: Calculate ATE weights
123123
# For treated: 1/PS, for control: 1/(1-PS)
124124
ate_weights <- wt_ate(
125-
propensity_scores,
125+
propensity_scores,
126126
seven_dwarfs$park_extra_magic_morning
127127
)
128128
@@ -133,7 +133,7 @@ ipw_model <- lm(
133133
weights = ate_weights
134134
)
135135
136-
# For proper inference, we need bootstrapping.
136+
# For proper inference, we need bootstrapping.
137137
# See the appendix at the bottom of this document.
138138
tidy(ipw_model)
139139
```
@@ -151,7 +151,7 @@ The algorithm is:
151151
```{r}
152152
# Step 1: Fit outcome model with exposure and all confounders
153153
outcome_model <- lm(
154-
wait_minutes_posted_avg ~ park_extra_magic_morning + park_ticket_season +
154+
wait_minutes_posted_avg ~ park_extra_magic_morning + park_ticket_season +
155155
park_close + park_temperature_high,
156156
data = seven_dwarfs
157157
)
@@ -171,7 +171,7 @@ pred_treated <- predict(outcome_model, newdata = data_all_treated)
171171
pred_control <- predict(outcome_model, newdata = data_all_control)
172172
173173
# Step 4: Calculate average treatment effect
174-
# For proper inference, we need bootstrapping.
174+
# For proper inference, we need bootstrapping.
175175
# See the appendix at the bottom of this document.
176176
ate_gcomp <- mean(pred_treated - pred_control)
177177
ate_gcomp
@@ -193,7 +193,7 @@ sl_library <- c(________, ________, ________, ________)
193193
194194
exposure_sl <- __________(
195195
Y = seven_dwarfs$__________,
196-
X = seven_dwarfs |>
196+
X = seven_dwarfs |>
197197
select(__________, __________, __________) |>
198198
mutate(park_close = as.numeric(park_close)),
199199
family = binomial(),
@@ -205,7 +205,7 @@ exposure_sl
205205
206206
outcome_sl <- __________(
207207
Y = seven_dwarfs$__________,
208-
X = seven_dwarfs |>
208+
X = seven_dwarfs |>
209209
select(__________, __________, __________, __________) |>
210210
mutate(park_close = as.numeric(park_close)),
211211
family = gaussian(),
@@ -232,7 +232,7 @@ exposure_results <- tibble(
232232
predicted = ______
233233
)
234234
235-
# Need event_level = "second" because yardstick treats first level ("0")
235+
# Need event_level = "second" because yardstick treats first level ("0")
236236
# as event by default
237237
exposure_auc <- roc_auc(exposure_results, truth, predicted, event_level = "second")
238238
exposure_auc
@@ -249,12 +249,12 @@ outcome_rmse
249249

250250
```{r}
251251
sl_library_extended <- c(
252-
"SL.glm",
253-
"SL.ranger",
252+
"SL.glm",
253+
"SL.ranger",
254254
"SL.earth",
255255
"SL.gam",
256256
"SL.glm.interaction",
257-
"SL.mean",
257+
"SL.mean",
258258
"SL.glmnet"
259259
)
260260
@@ -312,15 +312,15 @@ tidy(ipw_model) |>
312312
# For SuperLearner prediction, we need only the columns used in the model
313313
314314
# Dataset where everyone is treated, `park_extra_magic_morning` = 1
315-
data_all_treated <- seven_dwarfs |>
315+
data_all_treated <- seven_dwarfs |>
316316
select(park_extra_magic_morning, park_ticket_season, park_close, park_temperature_high) |>
317317
mutate(
318318
park_close = as.numeric(park_close),
319319
park_extra_magic_morning = ___
320320
)
321321
322322
# Dataset where everyone is control, `park_extra_magic_morning` = 0
323-
data_all_control <- seven_dwarfs |>
323+
data_all_control <- seven_dwarfs |>
324324
select(park_extra_magic_morning, park_ticket_season, park_close, park_temperature_high) |>
325325
mutate(
326326
park_close = as.numeric(park_close),
@@ -353,7 +353,7 @@ y_bounded <- (seven_dwarfs$__________ - min_y) / (max_y - min_y)
353353
# For TMLE with continuous outcomes, we need to fit on the bounded Y
354354
outcome_sl_bounded <- SuperLearner(
355355
Y = __________,
356-
X = seven_dwarfs |>
356+
X = seven_dwarfs |>
357357
select(__________, park_ticket_season, park_close, park_temperature_high) |>
358358
mutate(park_close = as.numeric(park_close)),
359359
family = quasibinomial(),
@@ -368,8 +368,8 @@ initial_pred_control <- predict(__________, newdata = ______)$pred[, 1]
368368
# each observation gets the counterfactual prediction based on their actual treatment
369369
# this is the same as predicting on the original dataset, but since we already calculated these, we'll just put it together ourselves
370370
initial_pred_observed <- ifelse(
371-
seven_dwarfs$park_extra_magic_morning == 1,
372-
initial_pred_treated,
371+
seven_dwarfs$park_extra_magic_morning == 1,
372+
initial_pred_treated,
373373
initial_pred_control
374374
)
375375
```
@@ -379,8 +379,8 @@ initial_pred_observed <- ifelse(
379379
```{r}
380380
# Step 2: Create the "clever covariate": this is the key to TMLE
381381
# It weights observations based on their propensity scores to achieve balance
382-
# For treated units: 1 / propensity_scores
383-
# For control units: -1 / (1 - propensity_scores)
382+
# For treated units: 1 / propensity_scores
383+
# For control units: -1 / (1 - propensity_scores)
384384
# This is NOT the ATE weights; it's a component of the efficient influence function
385385
# But it IS related, as it is also a consequence of targeting the ATE
386386
clever_covariate <- ifelse(
@@ -391,7 +391,7 @@ clever_covariate <- ifelse(
391391
```
392392

393393
6. Fit a fluctuation model with the bounded outcome, using `qlogis(initial_pred_observed)` as an offset and the clever covariate as a predictor (with no intercept). Use binomial family for the model.
394-
7. Get the fluctuation parameter `epsilon` from the model coefficients; this is the coefficient for the clever covariate.
394+
7. Get the fluctuation parameter `epsilon` from the model coefficients; this is the coefficient for the clever covariate.
395395

396396
```{r}
397397
# Step 3: The targeting step - a small parametric fluctuation of initial estimates
@@ -412,7 +412,7 @@ epsilon <- __________
412412
epsilon
413413
```
414414

415-
8. Now that we've calculated the fluctuation parameter, we can update our predictions to obtain targeted predictions that are minimize the bias-variance tradeoff for the average treatment effect.
415+
8. Now that we've calculated the fluctuation parameter, we can update our predictions to obtain targeted predictions that are minimize the bias-variance tradeoff for the average treatment effect.
416416
- For treated units, we add `epsilon * (1 / propensity_scores)`.
417417
- For control units, we add `epsilon * (-1 / (1 - propensity_scores)`.
418418

@@ -432,13 +432,13 @@ targeted_pred_control <- plogis(__________)
432432
433433
# we'll need this later for calculating the variance and confidence intervals
434434
targeted_pred_observed <- ifelse(
435-
seven_dwarfs$park_extra_magic_morning == 1,
436-
targeted_pred_treated,
435+
seven_dwarfs$park_extra_magic_morning == 1,
436+
targeted_pred_treated,
437437
targeted_pred_control
438438
)
439439
```
440440

441-
9. Let's visualize the initial vs targeted individual-level predictions for treated and control units. Set the x-axis to the initial predictions and the y-axis to the targeted predictions. For the first plot, use `initial_pred_treated` and `targeted_pred_treated`, and for the second plot, use `initial_pred_control` and `targeted_pred_control`.
441+
9. Let's visualize the initial vs targeted individual-level predictions for treated and control units. Set the x-axis to the initial predictions and the y-axis to the targeted predictions. For the first plot, use `initial_pred_treated` and `targeted_pred_treated`, and for the second plot, use `initial_pred_control` and `targeted_pred_control`.
442442

443443
```{r}
444444
# plot the initial vs targeted individual-level predictions for treated units
@@ -449,7 +449,7 @@ ggplot(seven_dwarfs, aes(x = __________, y = __________)) +
449449
labs(
450450
x = "Initial Predictions (Treated)",
451451
y = "Targeted Predictions (Treated)"
452-
) +
452+
) +
453453
theme_minimal()
454454
455455
# plot the initial vs targeted individual-level predictions for control units
@@ -460,7 +460,7 @@ ggplot(seven_dwarfs, aes(x = __________, y = __________)) +
460460
labs(
461461
x = "Initial Predictions (Control)",
462462
y = "Targeted Predictions (Control)"
463-
) +
463+
) +
464464
theme_minimal()
465465
```
466466

@@ -491,7 +491,7 @@ targeted_ate
491491
# 2. (targeted_pred_treated - targeted_pred_control - tmle_ate): captures uncertainty in the treatment effect
492492
# Each observation's IC value represents its contribution to the overall uncertainty
493493
# Note: IC uses bounded outcomes and predictions
494-
ic <- clever_covariate * (y_bounded - targeted_pred_observed) +
494+
ic <- clever_covariate * (y_bounded - targeted_pred_observed) +
495495
targeted_pred_treated - targeted_pred_control - targeted_ate / (max_y - min_y)
496496
497497
# Standard error is the standard deviation of IC values divided by sqrt(n)
@@ -518,7 +518,7 @@ tibble(
518518
R has several packages for TMLE: tmle, ltmle, and tmle3, all with slightly different designs and capabilities. We'll use the `tmle` package here, which is quite simple and works with SuperLearner.
519519

520520
```{r}
521-
confounders <- seven_dwarfs |>
521+
confounders <- seven_dwarfs |>
522522
select(park_ticket_season, park_close, park_temperature_high) |>
523523
mutate(park_close = as.numeric(park_close))
524524
@@ -559,21 +559,21 @@ set.seed(1234)
559559
560560
fit_ipw <- function(split, ...) {
561561
.df <- as.data.frame(split)
562-
562+
563563
# Fit propensity score model
564564
ps_model <- glm(
565565
park_extra_magic_morning ~ park_ticket_season + park_close + park_temperature_high,
566566
data = .df,
567567
family = binomial()
568568
)
569-
569+
570570
# Calculate weights
571571
ps <- augment(ps_model, type.predict = "response", data = .df)$.fitted
572572
weights <- wt_ate(ps, .df$park_extra_magic_morning, exposure_type = "binary")
573-
573+
574574
# Fit weighted outcome model
575-
lm(wait_minutes_posted_avg ~ park_extra_magic_morning,
576-
data = .df,
575+
lm(wait_minutes_posted_avg ~ park_extra_magic_morning,
576+
data = .df,
577577
weights = weights) |>
578578
tidy()
579579
}
@@ -591,21 +591,21 @@ int_bca(ipw_boot, results, .fn = fit_ipw) |>
591591
```{r}
592592
fit_gcomp <- function(split, ...) {
593593
.df <- as.data.frame(split)
594-
594+
595595
# Fit outcome model
596596
mod <- lm(
597-
wait_minutes_posted_avg ~ park_extra_magic_morning + park_ticket_season +
597+
wait_minutes_posted_avg ~ park_extra_magic_morning + park_ticket_season +
598598
park_close + park_temperature_high,
599599
data = .df
600600
)
601-
601+
602602
# Clone and predict
603603
df_treat <- .df |> mutate(park_extra_magic_morning = 1)
604604
df_control <- .df |> mutate(park_extra_magic_morning = 0)
605-
605+
606606
pred_treat <- augment(mod, newdata = df_treat)$.fitted
607607
pred_control <- augment(mod, newdata = df_control)$.fitted
608-
608+
609609
# Return results
610610
tibble(
611611
term = "ate",
@@ -618,4 +618,3 @@ gcomp_boot <- bootstraps(seven_dwarfs, 1000, apparent = TRUE) |>
618618
619619
int_bca(gcomp_boot, results, .fn = fit_gcomp)
620620
```
621-

slides/pdf/00-intro.pdf

-6 Bytes
Binary file not shown.
-10 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

slides/pdf/04-dags.pdf

-7 Bytes
Binary file not shown.

slides/pdf/05-quartets.pdf

-25 Bytes
Binary file not shown.

slides/pdf/06-pscores.pdf

-3 Bytes
Binary file not shown.

slides/pdf/07-using-pscores.pdf

0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)