Skip to content

Commit 427ad28

Browse files
Merge pull request #61 from r-causal/misc-cleanup
2 parents da93969 + 64c22ea commit 427ad28

14 files changed

+342
-98
lines changed

exercises/06-intro-pscores-exercises.qmd

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ library(tidyverse)
99
library(broom)
1010
library(touringplans)
1111
library(ggdag)
12+
library(ggokabeito)
1213
```
1314

1415
For Your Turn, we'll be looking at an example using Walt Disney World ride data from the touringplans package.
@@ -25,39 +26,63 @@ Below is a proposed DAG for this question.
2526

2627
```{r}
2728
set.seed(1234)
28-
# set up DAG coordinates
29+
2930
coord_dag <- list(
30-
x = c(season = 0, close = 0, weather_wdwhigh = -1, x = 1, y = 2),
31-
y = c(season = -1, close = 1, weather_wdwhigh = 0.25, x = 0, y = 0)
31+
x = c(Season = 0, close = 0, weather = -1, x = 1, y = 2),
32+
y = c(Season = -1, close = 1, weather = 0, x = 0, y = 0)
3233
)
3334
34-
# nicer labels for the nodes
3535
labels <- c(
3636
x = "Extra Magic Morning",
3737
y = "Average wait",
38-
season = "Ticket Season",
39-
weather_wdwhigh = "Historic high temperature",
38+
Season = "Ticket Season",
39+
weather = "Historic high temperature",
4040
close = "Time park closed"
4141
)
4242
43-
# visualize the dag
4443
dagify(
45-
y ~ x + close + season + weather_wdwhigh,
46-
x ~ weather_wdwhigh + close + season,
44+
y ~ x + close + Season + weather,
45+
x ~ weather + close + Season,
4746
coords = coord_dag,
48-
labels = labels
47+
labels = labels,
48+
exposure = "x",
49+
outcome = "y"
4950
) |>
50-
ggdag(use_labels = "label", text = FALSE) +
51-
theme_void() +
51+
tidy_dagitty() |>
52+
node_status() |>
53+
ggplot(
54+
aes(x, y, xend = xend, yend = yend, color = status)
55+
) +
56+
geom_dag_edges_arc(curvature = c(rep(0, 5), .3, 0)) +
57+
geom_dag_point() +
58+
geom_dag_label_repel(
59+
aes(x, y, label = label),
60+
box.padding = 3.5,
61+
inherit.aes = FALSE,
62+
max.overlaps = Inf,
63+
family = "sans",
64+
seed = 1630,
65+
label.size = NA,
66+
label.padding = 0.1,
67+
size = 14 / 3
68+
) +
69+
scale_color_okabe_ito(na.value = "grey90") +
70+
theme_dag() +
71+
theme(
72+
legend.position = "none",
73+
axis.text.x = element_text()
74+
) +
75+
coord_cartesian(clip = "off") +
5276
scale_x_continuous(
53-
limits = c(-1.25, 2.25),
54-
breaks = c(-1, 0, 1, 2),
77+
limits = c(-1.25, 2.25),
78+
breaks = c(-1, 0, 1, 2),
5579
labels = c(
56-
"\n(one year ago)", "\n(6 months ago)",
57-
"\n(3 months ago)", "9am - 10am\n(Today)"
80+
"\n(one year ago)",
81+
"\n(6 months ago)",
82+
"\n(3 months ago)",
83+
"5pm - 6pm\n(Today)"
5884
)
59-
) +
60-
theme(axis.text.x = element_text())
85+
)
6186
```
6287

6388
Here we are proposing that there are three confounders: the historic high temperature on the day, the time the park closed, and the ticket season: value, regular, or peak.

exercises/09-outcome-model-exercises.qmd

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,27 @@ Bootstrap this result 1000 times.
4545
set.seed(1234)
4646
4747
ipw_results <- ____(___, 1000, apparent = TRUE) |>
48-
mutate(results = map(splits, _____))
48+
mutate(boot_fits = map(splits, _____))
4949
```
5050

51+
Check out the distribution of estimates (**no need to change this code**)
52+
53+
```{r}
54+
#| eval: false
55+
ipw_results |>
56+
mutate(
57+
estimate = map_dbl(
58+
boot_fits,
59+
# pull the `estimate` for `qsmk` for each fit
60+
\(.fit) .fit |>
61+
filter(term == "qsmk") |>
62+
pull(estimate)
63+
)
64+
) |>
65+
ggplot(aes(estimate)) +
66+
geom_histogram(fill = "#D55E00FF", color = "white", alpha = 0.8) +
67+
theme_minimal()
68+
```
5169

5270
Calculate the confidence interval
5371

exercises/10-continuous-g-computation-exercises.qmd

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,37 +20,67 @@ In the touringplans data set, we have information about the posted waiting times
2020
#| message: false
2121
#| warning: false
2222
library(ggdag)
23+
library(ggokabeito)
2324
2425
coord_dag <- list(
25-
x = c(wdw_ticket_season = -1, close = -1, weather_wdwhigh = -2, extra_magic_morning = 0, avg_spostmin = 1, avg_sactmin = 2),
26-
y = c(wdw_ticket_season = -1, close = 1, weather_wdwhigh = 0.25, extra_magic_morning = 0, avg_spostmin = 0, avg_sactmin = 0)
26+
x = c(Season = -1, close = -1, weather = -2, extra = 0, x = 1, y = 2),
27+
y = c(Season = -1, close = 1, weather = 0, extra = 0, x = 0, y = 0)
2728
)
2829
2930
labels <- c(
30-
avg_sactmin = "Average actual wait",
31-
avg_spostmin = "Average posted wait ",
32-
extra_magic_morning = "Extra Magic Morning",
33-
wdw_ticket_season = "Ticket Season",
34-
weather_wdwhigh = "Historic high temperature",
31+
extra = "Extra Magic Morning",
32+
x = "Average posted wait ",
33+
y = "Average acutal wait",
34+
Season = "Ticket Season",
35+
weather = "Historic high temperature",
3536
close = "Time park closed"
3637
)
3738
38-
wait_time_dag <- dagify(
39-
avg_sactmin ~ avg_spostmin + close + wdw_ticket_season + weather_wdwhigh + extra_magic_morning,
40-
avg_spostmin ~ weather_wdwhigh + close + wdw_ticket_season + extra_magic_morning,
39+
dagify(
40+
y ~ x + close + Season + weather + extra,
41+
x ~ weather + close + Season + extra,
42+
extra ~ weather + close + Season,
4143
coords = coord_dag,
42-
labels = labels
43-
)
44-
45-
wait_time_dag |>
46-
ggdag(use_labels = "label", text = FALSE) +
47-
theme_void() +
48-
scale_x_continuous(
49-
limits = c(-2.25, 2.25),
50-
breaks = c(-2, -1, 0, 1, 2),
51-
labels = c("\n(one year ago)", "\n(6 months ago)", "\n(3 months ago)", "8am-9am\n(Today)", "9am-10am\n(Today)")
44+
labels = labels,
45+
exposure = "x",
46+
outcome = "y"
47+
) |>
48+
tidy_dagitty() |>
49+
node_status() |>
50+
ggplot(
51+
aes(x, y, xend = xend, yend = yend, color = status)
52+
) +
53+
geom_dag_edges_arc(curvature = c(rep(0, 7), .2, 0, .2, .2, 0), edge_colour = "grey70") +
54+
geom_dag_point() +
55+
geom_dag_label_repel(
56+
aes(x, y, label = label),
57+
box.padding = 3.5,
58+
inherit.aes = FALSE,
59+
max.overlaps = Inf,
60+
family = "sans",
61+
seed = 1602,
62+
label.size = NA,
63+
label.padding = 0.1,
64+
size = 14 / 3
65+
) +
66+
scale_color_okabe_ito(na.value = "grey90") +
67+
theme_dag() +
68+
theme(
69+
legend.position = "none",
70+
axis.text.x = element_text()
5271
) +
53-
theme(axis.text.x = element_text())
72+
coord_cartesian(clip = "off") +
73+
scale_x_continuous(
74+
limits = c(-2.25, 2.25),
75+
breaks = c(-2, -1, 0, 1, 2),
76+
labels = c(
77+
"\n(one year ago)",
78+
"\n(6 months ago)",
79+
"\n(3 months ago)",
80+
"8am-9am\n(Today)",
81+
"9am-10am\n(Today)"
82+
)
83+
)
5484
```
5585

5686
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.
@@ -151,7 +181,7 @@ fit_gcomp <- function(split, ...) {
151181
152182
153183
# predict actual wait time for each cloned dataset
154-
184+
155185
156186
# calculate ATE
157187
bind_cols(predicted_yes, predicted_no) |>

exercises/13-bonus-selection-bias-exercises.qmd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ format: html
88
library(tidyverse)
99
library(broom)
1010
library(propensity)
11+
library(causaldata)
1112
```
1213

1314
In this example, we'll consider loss to follow-up in the NHEFS study. We'll use the binary exposure we used earlier in the workshop: does quitting smoking (`smk`) increase weight (`wt82_71`)? This time, however, we'll adjust for loss to followup (people who dropped out of the study between observation periods) using inverse probability of censoring weights.

exercises/14-bonus-continuous-pscores-exercises.qmd

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,37 +20,67 @@ In the touringplans data set, we have information about the posted waiting times
2020
#| message: false
2121
#| warning: false
2222
library(ggdag)
23+
library(ggokabeito)
2324
2425
coord_dag <- list(
25-
x = c(wdw_ticket_season = -1, close = -1, weather_wdwhigh = -2, extra_magic_morning = 0, avg_spostmin = 1, avg_sactmin = 2),
26-
y = c(wdw_ticket_season = -1, close = 1, weather_wdwhigh = 0.25, extra_magic_morning = 0, avg_spostmin = 0, avg_sactmin = 0)
26+
x = c(Season = -1, close = -1, weather = -2, extra = 0, x = 1, y = 2),
27+
y = c(Season = -1, close = 1, weather = 0, extra = 0, x = 0, y = 0)
2728
)
2829
2930
labels <- c(
30-
avg_sactmin = "Average actual wait",
31-
avg_spostmin = "Average posted wait ",
32-
extra_magic_morning = "Extra Magic Morning",
33-
wdw_ticket_season = "Ticket Season",
34-
weather_wdwhigh = "Historic high temperature",
31+
extra = "Extra Magic Morning",
32+
x = "Average posted wait ",
33+
y = "Average acutal wait",
34+
Season = "Ticket Season",
35+
weather = "Historic high temperature",
3536
close = "Time park closed"
3637
)
3738
38-
wait_time_dag <- dagify(
39-
avg_sactmin ~ avg_spostmin + close + wdw_ticket_season + weather_wdwhigh + extra_magic_morning,
40-
avg_spostmin ~ weather_wdwhigh + close + wdw_ticket_season + extra_magic_morning,
39+
dagify(
40+
y ~ x + close + Season + weather + extra,
41+
x ~ weather + close + Season + extra,
42+
extra ~ weather + close + Season,
4143
coords = coord_dag,
42-
labels = labels
43-
)
44-
45-
wait_time_dag |>
46-
ggdag(use_labels = "label", text = FALSE) +
47-
theme_void() +
48-
scale_x_continuous(
49-
limits = c(-2.25, 2.25),
50-
breaks = c(-2, -1, 0, 1, 2),
51-
labels = c("\n(one year ago)", "\n(6 months ago)", "\n(3 months ago)", "8am-9am\n(Today)", "9am-10am\n(Today)")
44+
labels = labels,
45+
exposure = "x",
46+
outcome = "y"
47+
) |>
48+
tidy_dagitty() |>
49+
node_status() |>
50+
ggplot(
51+
aes(x, y, xend = xend, yend = yend, color = status)
5252
) +
53-
theme(axis.text.x = element_text())
53+
geom_dag_edges_arc(curvature = c(rep(0, 7), .2, 0, .2, .2, 0), edge_colour = "grey70") +
54+
geom_dag_point() +
55+
geom_dag_label_repel(
56+
aes(x, y, label = label),
57+
box.padding = 3.5,
58+
inherit.aes = FALSE,
59+
max.overlaps = Inf,
60+
family = "sans",
61+
seed = 1602,
62+
label.size = NA,
63+
label.padding = 0.1,
64+
size = 14 / 3
65+
) +
66+
scale_color_okabe_ito(na.value = "grey90") +
67+
theme_dag() +
68+
theme(
69+
legend.position = "none",
70+
axis.text.x = element_text()
71+
) +
72+
coord_cartesian(clip = "off") +
73+
scale_x_continuous(
74+
limits = c(-2.25, 2.25),
75+
breaks = c(-2, -1, 0, 1, 2),
76+
labels = c(
77+
"\n(one year ago)",
78+
"\n(6 months ago)",
79+
"\n(3 months ago)",
80+
"8am-9am\n(Today)",
81+
"9am-10am\n(Today)"
82+
)
83+
)
5484
```
5585

5686
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.

slides/raw/01-causal_modeling_whole_game.qmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ fit_ipw <- function(split, ...) {
511511
#| cache: true
512512
#| code-line-numbers: "|2-3"
513513
# fit ipw model to bootstrapped samples
514-
ipw_results <- bootstraps(nhefs_complete, 1000, apparent = TRUE) |>
514+
ipw_results <- bootstraps(nhefs_complete_uc, 1000, apparent = TRUE) |>
515515
mutate(results = map(splits, fit_ipw))
516516
```
517517

slides/raw/06-pscores.qmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ dagify(
164164
ggplot(
165165
aes(x, y, xend = xend, yend = yend, color = status)
166166
) +
167-
geom_dag_edges_arc(curvature = c(rep(0, 5), .3)) +
167+
geom_dag_edges_arc(curvature = c(rep(0, 5), .3, 0)) +
168168
geom_dag_point() +
169169
geom_dag_label_repel(seed = 1630) +
170170
scale_color_okabe_ito(na.value = "grey90") +

0 commit comments

Comments
 (0)