Skip to content

Commit f809dac

Browse files
Merge pull request #63 from r-causal/touringplans
2 parents d1edc41 + 321c22e commit f809dac

10 files changed

+65
-65
lines changed

exercises/06-intro-pscores-exercises.qmd

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ dagify(
8080
"\n(one year ago)",
8181
"\n(6 months ago)",
8282
"\n(3 months ago)",
83-
"5pm - 6pm\n(Today)"
83+
"9am - 10am\n(Today)"
8484
)
8585
)
8686
```
@@ -93,24 +93,24 @@ First we need to subset the data to only include average wait times between 9 an
9393

9494
```{r}
9595
seven_dwarfs <- seven_dwarfs_train_2018 |>
96-
filter(hour == 9)
96+
filter(wait_hour == 9)
9797
```
9898

9999
Here's a data dictionary of the variables we need in the `seven_dwarfs` data set:
100100

101101
| Variable | Column in `seven_dwarfs` |
102102
|--------------------------------|--------------------------|
103-
| Posted Wait Time (outcome) | `avg_spostmin` |
104-
| Extra Magic Morning (exposure) | `extra_magic_morning` |
105-
| Ticket Season | `wdw_ticket_season` |
106-
| Closing Time | `close` |
107-
| Historic Temperature | `weather_wdwhigh` |
103+
| Posted Wait Time (outcome) | `wait_minutes_posted_avg`|
104+
| Extra Magic Morning (exposure) | `park_extra_magic_morning` |
105+
| Ticket Season | `park_ticket_season` |
106+
| Closing Time | `park_close` |
107+
| Historic Temperature | `park_temperature_high` |
108108

109109
## Your Turn
110110

111111
*After updating the code chunks below, change `eval: true` before rendering*
112112

113-
Now, fit a propensity score model for `extra_magic_morning` using the above proposed confounders.
113+
Now, fit a propensity score model for `park_extra_magic_morning` using the above proposed confounders.
114114

115115
```{r}
116116
#| eval: false
@@ -131,7 +131,7 @@ df <- propensity_model |>
131131

132132
Stretch Goal 1:
133133

134-
Examine two histograms of the propensity scores, one days with Extra Magic Morning (`extra_magic_morning == 1`) and one for days without it (`extra_magic_morning == 0`).
134+
Examine two histograms of the propensity scores, one days with Extra Magic Morning (`park_extra_magic_morning == 1`) and one for days without it (`park_extra_magic_morning == 0`).
135135
How do these compare?
136136

137137
```{r}

exercises/07-pscores-using-exercises.qmd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ Below is the propensity score model you created in the previous exercise.
1818

1919
```{r}
2020
seven_dwarfs <- seven_dwarfs_train_2018 |>
21-
filter(hour == 9)
21+
filter(wait_hour == 9)
2222
2323
propensity_model <- glm(
24-
extra_magic_morning ~ wdw_ticket_season + close + weather_wdwhigh,
24+
park_extra_magic_morning ~ park_ticket_season + park_close + park_temperature_high,
2525
data = seven_dwarfs,
2626
family = binomial()
2727
)
@@ -82,7 +82,7 @@ Update the code below to examine the distribution of the weighted sample. **HINT
8282
#| warning: false
8383
ggplot(
8484
seven_dwarfs_prop,
85-
aes(.fitted, fill = factor(extra_magic_morning))
85+
aes(.fitted, fill = factor(park_extra_magic_morning))
8686
) +
8787
geom_mirror_histogram(bins = 50, alpha = .5) +
8888
geom_mirror_histogram(aes(weight = ____), alpha = .5, bins = 50) +

exercises/08-pscores-diagnostics-exercises.qmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,17 @@ Below is the propensity score model and weights you created in the previous exer
1919

2020
```{r}
2121
seven_dwarfs <- seven_dwarfs_train_2018 |>
22-
filter(hour == 9)
22+
filter(wait_hour == 9)
2323
2424
propensity_model <- glm(
25-
extra_magic_morning ~ wdw_ticket_season + close + weather_wdwhigh,
25+
park_extra_magic_morning ~ park_ticket_season + park_close + park_temperature_high,
2626
data = seven_dwarfs,
2727
family = binomial()
2828
)
2929
3030
seven_dwarfs_ps <- propensity_model |>
3131
augment(type.predict = "response", data = seven_dwarfs) |>
32-
mutate(w_ate = wt_ate(.fitted, extra_magic_morning))
32+
mutate(w_ate = wt_ate(.fitted, park_extra_magic_morning))
3333
```
3434

3535
## Your Turn 1
@@ -41,7 +41,7 @@ Calculate the standardized mean differences with and without weights
4141
```{r}
4242
#| eval: false
4343
smds <- seven_dwarfs_ps |>
44-
mutate(close = as.numeric(close)) |>
44+
mutate(park_close = as.numeric(park_close)) |>
4545
tidy_smd(
4646
.vars = ____,
4747
.group = ____,
@@ -62,7 +62,7 @@ ggplot(
6262

6363
## Your Turn 2
6464

65-
Create an unweighted ECDF for `weather_wdwhigh` by whether or not the day had Extra Magic Hours.
65+
Create an unweighted ECDF for `park_temperature_high` by whether or not the day had Extra Magic Hours.
6666

6767
```{r}
6868
#| eval: false
@@ -77,7 +77,7 @@ ggplot(seven_dwarfs_ps, aes(x = ____, group = ____, color = factor(____))) +
7777
ylab("Proportion <= x")
7878
```
7979

80-
Create an weighted ECDF for `weather_wdwhigh` by whether or not the day had Extra Magic Hours.
80+
Create an weighted ECDF for `park_temperature_high` by whether or not the day had Extra Magic Hours.
8181

8282
```{r}
8383
#| eval: false

exercises/09-outcome-model-exercises.qmd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ library(rsample)
1313
library(propensity)
1414
1515
seven_dwarfs <- seven_dwarfs_train_2018 |>
16-
filter(hour == 9)
16+
filter(wait_hour == 9)
1717
```
1818

1919
We are interested in examining the relationship between whether there were "Extra Magic Hours" in the morning (the **exposure**) and the average wait time for the Seven Dwarfs Mine Train the same day between 9am and 10am (the **outcome**).
@@ -57,9 +57,9 @@ ipw_results |>
5757
mutate(
5858
estimate = map_dbl(
5959
boot_fits,
60-
# pull the `estimate` for `extra_magic_morning` for each fit
60+
# pull the `estimate` for `park_extra_magic_morning` for each fit
6161
\(.fit) .fit |>
62-
filter(term == "extra_magic_morning") |>
62+
filter(term == "park_extra_magic_morning") |>
6363
pull(estimate)
6464
)
6565
) |>

exercises/10-continuous-g-computation-exercises.qmd

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ library(splines)
1313

1414
For this set of exercises, we'll use g-computation to calculate a causal effect for continuous exposures.
1515

16-
In the touringplans data set, we have information about the posted waiting times for rides. We also have a limited amount of data on the observed, actual times. The question that we will consider is this: Do posted wait times (`avg_spostmin`) for the Seven Dwarves Mine Train at 8 am affect actual wait times (`avg_sactmin`) at 9 am? Here’s our DAG:
16+
In the touringplans data set, we have information about the posted waiting times for rides. We also have a limited amount of data on the observed, actual times. The question that we will consider is this: Do posted wait times (`wait_minutes_posted_avg`) for the Seven Dwarves Mine Train at 8 am affect actual wait times (`wait_minutes_actual_avg`) at 9 am? Here’s our DAG:
1717

1818
```{r}
1919
#| echo: false
@@ -83,29 +83,29 @@ dagify(
8383
)
8484
```
8585

86-
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.
86+
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `wait_minutes_actual_avg`, so we’ll drop unobserved values for now.
8787

8888
You don't need to update any code here, so just run this.
8989

9090
```{r}
9191
eight <- seven_dwarfs_train_2018 |>
92-
filter(hour == 8) |>
93-
select(-avg_sactmin)
92+
filter(wait_hour == 8) |>
93+
select(-wait_minutes_actual_avg)
9494
9595
nine <- seven_dwarfs_train_2018 |>
96-
filter(hour == 9) |>
97-
select(date, avg_sactmin)
96+
filter(wait_hour == 9) |>
97+
select(park_date, wait_minutes_actual_avg)
9898
9999
wait_times <- eight |>
100-
left_join(nine, by = "date") |>
101-
drop_na(avg_sactmin)
100+
left_join(nine, by = "park_date") |>
101+
drop_na(wait_minutes_actual_avg)
102102
```
103103

104104
# Your Turn 1
105105

106-
For the parametric G-formula, we'll use a single model to fit a causal model of Posted Waiting Times (`avg_spostmin`) on Actual Waiting Times (`avg_sactmin`) where we include all covariates, much as we normally fit regression models. However, instead of interpreting the coefficients, we'll calculate the estimate by predicting on cloned data sets.
106+
For the parametric G-formula, we'll use a single model to fit a causal model of Posted Waiting Times (`wait_minutes_posted_avg`) on Actual Waiting Times (`wait_minutes_actual_avg`) where we include all covariates, much as we normally fit regression models. However, instead of interpreting the coefficients, we'll calculate the estimate by predicting on cloned data sets.
107107

108-
Two additional differences in our model: we'll use a natural cubic spline on the exposure, `avg_spostmin`, using `ns()` from the splines package, and we'll include an interaction term between `avg_spostmin` and `extra_magic_mornin g`. These complicate the interpretation of the coefficient of the model in normal regression but have virtually no downside (as long as we have a reasonable sample size) in g-computation, because we still get an easily interpretable result.
108+
Two additional differences in our model: we'll use a natural cubic spline on the exposure, `wait_minutes_posted_avg`, using `ns()` from the splines package, and we'll include an interaction term between `wait_minutes_posted_avg` and `park_extra_magic_morning`. These complicate the interpretation of the coefficient of the model in normal regression but have virtually no downside (as long as we have a reasonable sample size) in g-computation, because we still get an easily interpretable result.
109109

110110
First, let's fit the model.
111111

@@ -114,14 +114,14 @@ First, let's fit the model.
114114

115115
```{r}
116116
_______ ___ _______(
117-
avg_sactmin ~ ns(_______, df = 5)*extra_magic_morning + _______ + _______ + _______,
117+
wait_minutes_actual_avg ~ ns(_______, df = 5)*park_extra_magic_morning + _______ + _______ + _______,
118118
data = seven_dwarfs
119119
)
120120
```
121121

122122
# Your Turn 2
123123

124-
Now that we've fit a model, we need to clone our data set. To do this, we'll simply mutate it so that in one set, all participants have `avg_spostmin` set to 30 minutes and in another, all participants have `avg_spostmin` set to 60 minutes.
124+
Now that we've fit a model, we need to clone our data set. To do this, we'll simply mutate it so that in one set, all participants have `wait_minutes_posted_avg` set to 30 minutes and in another, all participants have `wait_minutes_posted_avg` set to 60 minutes.
125125

126126
1. Create the cloned data sets, called `thirty` and `sixty`.
127127
2. For both data sets, use `standardized_model` and `augment()` to get the predicted values. Use the `newdata` argument in `augment()` with the relevant cloned data set. Then, select only the fitted value. Rename `.fitted` to either `thirty_posted_minutes` or `sixty_posted_minutes` (use the pattern `select(new_name = old_name)`).

exercises/14-bonus-continuous-pscores-exercises.qmd

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ library(propensity)
1313

1414
For this set of exercises, we'll use propensity scores for continuous exposures.
1515

16-
In the touringplans data set, we have information about the posted waiting times for rides. We also have a limited amount of data on the observed, actual times. The question that we will consider is this: Do posted wait times (`avg_spostmin`) for the Seven Dwarves Mine Train at 8 am affect actual wait times (`avg_sactmin`) at 9 am? Here’s our DAG:
16+
In the touringplans data set, we have information about the posted waiting times for rides. We also have a limited amount of data on the observed, actual times. The question that we will consider is this: Do posted wait times (`wait_minutes_posted_avg`) for the Seven Dwarves Mine Train at 8 am affect actual wait times (`wait_minutes_actual_avg`) at 9 am? Here’s our DAG:
1717

1818
```{r}
1919
#| echo: false
@@ -83,31 +83,31 @@ dagify(
8383
)
8484
```
8585

86-
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.
86+
First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `wait_minutes_actual_avg`, so we’ll drop unobserved values for now.
8787

8888
You don't need to update any code here, so just run this.
8989

9090
```{r}
9191
eight <- seven_dwarfs_train_2018 |>
92-
filter(hour == 8) |>
93-
select(-avg_sactmin)
92+
filter(wait_hour == 8) |>
93+
select(-wait_minutes_actual_avg)
9494
9595
nine <- seven_dwarfs_train_2018 |>
96-
filter(hour == 9) |>
97-
select(date, avg_sactmin)
96+
filter(wait_hour == 9) |>
97+
select(park_date, wait_minutes_actual_avg)
9898
9999
wait_times <- eight |>
100-
left_join(nine, by = "date") |>
101-
drop_na(avg_sactmin)
100+
left_join(nine, by = "park_date") |>
101+
drop_na(wait_minutes_actual_avg)
102102
```
103103

104104
# Your Turn 1
105105

106-
First, let’s calculate the propensity score model, which will be the denominator in our stabilized weights (more to come on that soon). We’ll fit a model using `lm()` for `avg_spostmin` with our covariates, then use the fitted predictions of `avg_spostmin` (`.fitted`, `.sigma`) to calculate the density using `dnorm()`.
106+
First, let’s calculate the propensity score model, which will be the denominator in our stabilized weights (more to come on that soon). We’ll fit a model using `lm()` for `wait_minutes_posted_avg` with our covariates, then use the fitted predictions of `wait_minutes_posted_avg` (`.fitted`, `.sigma`) to calculate the density using `dnorm()`.
107107

108-
1. Fit a model using `lm()` with `avg_spostmin` as the outcome and the confounders identified in the DAG.
108+
1. Fit a model using `lm()` with `wait_minutes_posted_avg` as the outcome and the confounders identified in the DAG.
109109
2. Use `augment()` to add model predictions to the data frame.
110-
3. In `wt_ate()`, calculate the weights using `avg_postmin`, `.fitted`, and `.sigma`.
110+
3. In `wt_ate()`, calculate the weights using `wait_minutes_posted_avg`, `.fitted`, and `.sigma`.
111111

112112
```{r}
113113
post_time_model <- lm(
@@ -169,7 +169,7 @@ Now, let's fit the outcome model!
169169
```{r}
170170
lm(___ ~ ___, weights = ___, data = wait_times_swts) |>
171171
tidy() |>
172-
filter(term == "avg_spostmin") |>
172+
filter(term == "wait_minutes_posted_avg") |>
173173
mutate(estimate = estimate * 10)
174174
```
175175

slides/raw/06-pscores.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ dagify(
181181
"\n(one year ago)",
182182
"\n(6 months ago)",
183183
"\n(3 months ago)",
184-
"5pm - 6pm\n(Today)"
184+
"9am - 10am\n(Today)"
185185
)
186186
)
187187
```
@@ -190,7 +190,7 @@ dagify(
190190

191191
`r countdown::countdown(minutes = 10)`
192192

193-
### Using the **confounders** identified in the previous DAG, fit a propensity score model for `extra_magic_morning`
193+
### Using the **confounders** identified in the previous DAG, fit a propensity score model for `park_extra_magic_morning`
194194
### *Stretch*: Create two histograms, one of the propensity scores for days with extra morning magic hours and one for those without
195195

196196

slides/raw/08-pscore-diagnostics.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ ggplot(df, aes(x = wt71, color = factor(qsmk))) +
178178

179179
`r countdown::countdown(minutes = 10)`
180180

181-
### Create an unweighted ECDF examining the `weather_wdwhigh` confounder by whether or not the day had Extra Magic Hours.
182-
### Create a weighted ECDF examining the `weather_wdwhigh` confounder
181+
### Create an unweighted ECDF examining the `park_temperature_high` confounder by whether or not the day had Extra Magic Hours.
182+
### Create a weighted ECDF examining the `park_temperature_high` confounder
183183

184184

185185
## {background-color="#23373B" .center .huge}

slides/raw/09-outcome-model.qmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ boot_estimate <- int_t(ipw_results, boot_fits) |>
143143

144144
`r countdown::countdown(minutes = 12)`
145145

146-
### Create a function called `ipw_fit` that fits the propensity score model and the weighted outcome model for the effect between `extra_magic_morning` and `avg_spostmin`
146+
### Create a function called `ipw_fit` that fits the propensity score model and the weighted outcome model for the effect between `park_extra_magic_morning` and `wait_minutes_posted_avg`
147147

148148
### Using the `bootstraps()` and `int_t()` functions to estimate the final effect.
149149

slides/raw/14-bonus-continuous-pscores.qmd

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,9 @@ dagify(
192192

193193
## *Your Turn 1*
194194

195-
### Fit a model using `lm()` with `avg_spostmin` as the outcome and the confounders identified in the DAG.
195+
### Fit a model using `lm()` with `wait_minutes_posted_avg` as the outcome and the confounders identified in the DAG.
196196
### Use `augment()` to add model predictions to the data frame
197-
### In `wt_ate()`, calculate the weights using `avg_postmin`, `.fitted`, and `.sigma`
197+
### In `wt_ate()`, calculate the weights using `wait_minutes_posted_avg`, `.fitted`, and `.sigma`
198198

199199
`r countdown::countdown(minutes = 5)`
200200

@@ -203,23 +203,23 @@ dagify(
203203
```{r}
204204
#| include: false
205205
eight <- seven_dwarfs_train_2018 |>
206-
filter(hour == 8) |>
207-
select(-avg_sactmin)
206+
filter(wait_hour == 8) |>
207+
select(-wait_minutes_posted_avg)
208208
209209
nine <- seven_dwarfs_train_2018 |>
210-
filter(hour == 9) |>
211-
select(date, avg_sactmin)
210+
filter(wait_hour == 9) |>
211+
select(park_date, wait_minutes_posted_avg)
212212
213213
wait_times <- eight |>
214-
left_join(nine, by = "date") |>
215-
drop_na(avg_sactmin)
214+
left_join(nine, by = "park_date") |>
215+
drop_na(wait_minutes_posted_avg)
216216
```
217217

218218
```{r}
219219
post_time_model <- lm(
220-
avg_spostmin ~
221-
close + extra_magic_morning +
222-
weather_wdwhigh + wdw_ticket_season,
220+
wait_minutes_posted_avg ~
221+
park_close + park_extra_magic_morning +
222+
park_temperature_high + park_ticket_season,
223223
data = wait_times
224224
)
225225
```
@@ -230,7 +230,7 @@ post_time_model <- lm(
230230
wait_times_wts <- post_time_model |>
231231
augment(data = wait_times) |>
232232
mutate(wts = wt_ate(
233-
avg_spostmin, .fitted, .sigma = .sigma
233+
wait_minutes_posted_avg, .fitted, .sigma = .sigma
234234
))
235235
```
236236

@@ -289,7 +289,7 @@ ggplot(nhefs_swts, aes(swts)) +
289289
wait_times_swts <- post_time_model |>
290290
augment(data = wait_times) |>
291291
mutate(swts = wt_ate(
292-
avg_spostmin,
292+
wait_minutes_posted_avg,
293293
.fitted,
294294
.sigma = .sigma,
295295
stabilize = TRUE
@@ -324,12 +324,12 @@ lm(
324324

325325
```{r}
326326
lm(
327-
avg_sactmin ~ avg_spostmin,
327+
wait_minutes_actual_avg ~ wait_minutes_posted_avg,
328328
weights = swts,
329329
data = wait_times_swts
330330
) |>
331331
tidy() |>
332-
filter(term == "avg_spostmin") |>
332+
filter(term == "wait_minutes_posted_avg") |>
333333
mutate(estimate = estimate * 10)
334334
```
335335

0 commit comments

Comments
 (0)