r-causal
diff --git a/‎exercises/05-quartets-exercises.qmd‎
Lines changed: 7 additions & 3 deletions b/‎exercises/05-quartets-exercises.qmd‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎exercises/06-intro-pscores-exercises.qmd‎
Lines changed: 10 additions & 14 deletions b/‎exercises/06-intro-pscores-exercises.qmd‎
Lines changed: 10 additions & 14 deletions
diff --git a/‎exercises/07-pscores-using-exercises.qmd‎
Lines changed: 20 additions & 16 deletions b/‎exercises/07-pscores-using-exercises.qmd‎
Lines changed: 20 additions & 16 deletions
diff --git a/‎exercises/08-pscores-diagnostics-exercises.qmd‎
Lines changed: 30 additions & 7 deletions b/‎exercises/08-pscores-diagnostics-exercises.qmd‎
Lines changed: 30 additions & 7 deletions
diff --git a/‎exercises/09-outcome-model-exercises.qmd‎
Lines changed: 0 additions & 6 deletions b/‎exercises/09-outcome-model-exercises.qmd‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎exercises/10-continuous-g-computation-exercises.qmd‎
Lines changed: 7 additions & 10 deletions b/‎exercises/10-continuous-g-computation-exercises.qmd‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎exercises/12-whole-game-2-exercises.qmd‎
Lines changed: 2 additions & 1 deletion b/‎exercises/12-whole-game-2-exercises.qmd‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎slides/pdf/00-intro.pdf‎
3.14 MB b/‎slides/pdf/00-intro.pdf‎
3.14 MB
diff --git a/‎slides/pdf/01-causal_modeling_whole_game.pdf‎
51.5 KB b/‎slides/pdf/01-causal_modeling_whole_game.pdf‎
51.5 KB
diff --git a/‎slides/pdf/02-when-standard-methods-succeed.pdf‎
-6.26 KB b/‎slides/pdf/02-when-standard-methods-succeed.pdf‎
-6.26 KB
@@ -11,19 +11,21 @@ library(quartets)
 
 ## Your turn 1
 
-For each of the following 4 datasets, look at the correlation between `exposure` and `covariate`: 
+For each of the following 4 datasets, create a scatterplot looking at the relationship between `exposure` and `outcome`: 
 
 * `causal_collider`
 * `causal_confounding`
 * `causal_mediator`
 * `causal_m_bias`
 
+(Alternatively, you can work with `causal_quartet`, which has all four datasets stacked on top of one another.)
 
 ```{r}
 
 ```
 
-For each of the above 4 datasets, create a scatterplot looking at the relationship between `exposure` and `outcome`
+For each of the above 4 datasets, look at the correlation between `exposure` and `covariate`
+
 
 ```{r}
 
@@ -37,13 +39,15 @@ For each of the above 4 datasets, fit a linear model to examine the relationship
 
 ## Your turn 2
 
-For each of the following 4 datasets, fit a linear linear model examining the relationship between `outcome_followup` and `exposure_baseline` adjusting for `covariate_baseline`: 
+For each of the following 4 datasets, fit a linear model examining the relationship between `outcome_followup` and `exposure_baseline` adjusting for `covariate_baseline`: 
 
 * `causal_collider_time`
 * `causal_confounding_time`
 * `causal_mediator_time`
 * `causal_m_bias_time`
 
+(Alternatively, you can work with `causal_quartet_time`, which has all four datasets stacked on top of one another.)
+
 ```{r}
 
 ```
 
@@ -28,32 +28,32 @@ Below is a proposed DAG for this question.
 set.seed(1234)
 
 coord_dag <- list(
-  x = c(Season = 0, close = 0, weather = -1, x = 1, y = 2),
-  y = c(Season = -1, close = 1, weather = 0, x = 0, y = 0)
+  x = c(season = 0, close = 0, weather = -1, emm = 1, wait_posted = 2),
+  y = c(season = -1, close = 1, weather = 0, emm = 0, wait_posted = 0)
 )
 
 labels <- c(
-  x = "Extra Magic Morning",
-  y = "Average wait",
-  Season = "Ticket Season",
+  emm = "Extra Magic Morning",
+  wait_posted = "Average wait",
+  season = "Ticket Season",
   weather = "Historic high temperature",
   close = "Time park closed"
 )
 
 dagify(
-  y ~ x + close + Season + weather,
-  x ~ weather + close + Season,
+  wait_posted ~ emm + close + season + weather,
+  emm ~ weather + close + season,
   coords = coord_dag,
   labels = labels,
-  exposure = "x",
-  outcome = "y"
+  exposure = "emm",
+  outcome = "wait_posted"
 ) |>
   tidy_dagitty() |>
   node_status() |>
   ggplot(
     aes(x, y, xend = xend, yend = yend, color = status)
   ) +
-  geom_dag_edges_arc(curvature = c(rep(0, 5), .3, 0)) +
+  geom_dag_edges_arc(curvature = c(rep(0, 6), .3)) +
   geom_dag_point() +
   geom_dag_label_repel(
     aes(x, y, label = label),
@@ -108,12 +108,9 @@ Here's a data dictionary of the variables we need in the `seven_dwarfs` data set
 
 ## Your Turn
 
-*After updating the code chunks below, change `eval: true` before rendering*
-
 Now, fit a propensity score model for `park_extra_magic_morning` using the above proposed confounders.
 
 ```{r}
-#| eval: false
 propensity_model <- ___(
   ___ ~ ___,
   data = seven_dwarfs,
@@ -124,7 +121,6 @@ propensity_model <- ___(
 Add the propensity scores to the `seven_dwarfs` data set, call this new dataset `df`.
 
 ```{r}
-#| eval: false
 df <- propensity_model |>
   ____(type.predict = ____, data = ____)
 ```
 
@@ -10,6 +10,7 @@ library(tidyverse)
 library(broom)
 library(touringplans)
 library(propensity)
+library(halfmoon)
 ```
 
 We are interested in examining the relationship between whether there were "Extra Magic Hours" in the morning (the **exposure**) and the average wait time for the Seven Dwarfs Mine Train the same day between 9am and 10am (the **outcome**).
@@ -32,12 +33,13 @@ seven_dwarfs_prop <- propensity_model |>
 
 ## Your Turn 1 (Matching)
 
-*After updating the code chunks below, change `eval: true` before rendering*
+Create at "matched" data set using the same propensity score model as above and a caliper of 0.2. 
 
-Create at "matched" data set using the same propensity score model as above and a caliper of 0.2.
+1. Provide `matchit()` the formula for the propensity score.
+2. Using the `link` and `caliper` arguments, create a caliper of 0.2 SDs on the linear logit scale.
+3. Extract the matched datasets into a new data frame called `matched_df`.
 
 ```{r}
-#| eval: false
 library(MatchIt)
 matched_dwarfs <- matchit(
   ___,
@@ -51,12 +53,9 @@ matched_df <- ___(matched_dwarfs)
 
 ## Your Turn 2 (Weighting)
 
-*After updating the code chunks below, change `eval: true` before rendering*
-
 Add the ATE weights to the data frame, `seven_dwarfs_prop`
 
 ```{r}
-#| eval: false
 seven_dwarfs_prop <- seven_dwarfs_prop |>
   mutate(w_ate = ___)
 ```
@@ -67,7 +66,6 @@ Stretch Goal 1:
 Add ATM weights to the data frame, `seven_dwarfs_prop`
 
 ```{r}
-#| eval: false
 seven_dwarfs_prop <- seven_dwarfs_prop |>
   mutate(w_atm = ___)
 ```
@@ -78,18 +76,24 @@ Update the code below to examine the distribution of the weighted sample. **HINT
 
 
 ```{r}
-#| eval: false
 #| warning: false
-ggplot(
-  seven_dwarfs_prop, 
-  aes(.fitted, fill = factor(park_extra_magic_morning))
-) +
-  geom_mirror_histogram(bins = 50, alpha = .5) +
-  geom_mirror_histogram(aes(weight = ____), alpha = .5, bins = 50) +
+seven_dwarfs_prop |> 
+  mutate(
+    park_extra_magic_morning = factor(park_extra_magic_morning)
+  ) |> 
+  ggplot(aes(.fitted)) +
+  geom_mirror_histogram(
+    aes(group = park_extra_magic_morning), 
+    bins = 30
+  ) +
+  geom_mirror_histogram(
+    aes(fill = park_extra_magic_morning, weight = ______), 
+    alpha = .5, 
+    bins = 30
+  ) +
   geom_hline(yintercept = 0, lwd = 0.5) +
   theme_minimal() +
   scale_y_continuous(labels = abs) +
   scale_fill_manual(values = c("blue", "green")) +
-  labs(x = "p", fill = "Extra Magic Morning") +
-  xlim(0, 1) 
+  labs(x = "p", fill = "Extra Magic Morning")
 ```
@@ -34,12 +34,9 @@ seven_dwarfs_ps <- propensity_model |>
 
 ## Your Turn 1
 
-*After updating the code chunks below, change `eval: true` before rendering*
-
 Calculate the standardized mean differences with and without weights
 
 ```{r}
-#| eval: false
 smds <- seven_dwarfs_ps |>
   mutate(park_close = as.numeric(park_close)) |>
   tidy_smd(
@@ -52,20 +49,20 @@ smds <- seven_dwarfs_ps |>
 Create the Love Plot using ggplot and halfmoon
 
 ```{r}
-#| eval: false
 ggplot(
   data = ____,
   aes(x = abs(____), y = ____, group = ____, color = ____)
 ) +
   geom_love()
 ```
 
+Stretch goal: Create a Love Plot using `make_dummy_vars = TRUE` and sorted by `abs(smd)` for the SMD values for the rows represnting the observed data.
+
 ## Your Turn 2
 
 Create an unweighted ECDF for `park_temperature_high` by whether or not the day had Extra Magic Hours.
 
 ```{r}
-#| eval: false
 ggplot(seven_dwarfs_ps, aes(x = ____, group = ____, color = factor(____))) +
   ____() +
   scale_color_manual(
@@ -80,15 +77,41 @@ ggplot(seven_dwarfs_ps, aes(x = ____, group = ____, color = factor(____))) +
 Create an weighted ECDF for `park_temperature_high` by whether or not the day had Extra Magic Hours.
 
 ```{r}
-#| eval: false
 ggplot(seven_dwarfs_ps, aes(x = ____, color = factor(____))) +
   ____(aes(weights = ____)) + 
   scale_color_manual(
     "Extra Magic Hours",
     values = c("#5154B8", "#5DB854"),
     labels = c("Yes", "No")
   ) +
-  xlab(____) +
+  xlab("Historic Temperature") +
   ylab("Proportion <= x (Weighted)")
 ```
 
+## Bonus Your Turn: Weighted Tables
+
+Create a weighted table for the seven dwarfs dataset given your weights
+
+1. Create a survey design object using `svydesign()` that specifies the weights as `w_ate`
+2. Use `tbl_svysummary()` by `park_extra_magic_morning` to specify a weighted table
+3. Use `add_difference()` to add `"smd"` differences for every variable
+
+
+```{r}
+library(survey)
+library(gtsummary)
+seven_dwarfs_ps |> 
+  select(park_extra_magic_morning, park_ticket_season, park_close, park_temperature_high, w_ate) |> 
+______(
+  ids = ~ 1,
+  data = _,
+  weights = ~ ____
+) |> 
+  ______(
+    by = ____,
+    include = -w_ate
+  ) |>
+  add_difference(everything() ~ "____") 
+```
+
+
@@ -20,12 +20,9 @@ We are interested in examining the relationship between whether there were "Extr
 
 ## Your turn
 
-*After updating the code chunks below, change `eval: true` before rendering*
-
 Create a function called `ipw_fit` that fits the propensity score model from Exercise 03, incorporates the ATE weights calculated in Exercise 04, and fits a weighted outcome model.
 
 ```{r}
-#| eval: false
 fit_ipw <- function(split, ...) { 
   .df <- ____
   
@@ -42,7 +39,6 @@ fit_ipw <- function(split, ...) {
 Bootstrap this result 1000 times.
 
 ```{r}
-#| eval: false
 set.seed(1234)
 
 ipw_results <- ____(___, 1000, apparent = TRUE) |>
@@ -52,7 +48,6 @@ ipw_results <- ____(___, 1000, apparent = TRUE) |>
 Check out the distribution of estimates (**no need to change this code**)
 
 ```{r}
-#| eval: false
 ipw_results |>
   mutate(
     estimate = map_dbl(
@@ -71,7 +66,6 @@ ipw_results |>
 Calculate the confidence interval
 
 ```{r}
-#| eval: false
 boot_estimate <- ____(____, ____) |>
   filter(term == ____)
 
 
@@ -9,9 +9,6 @@ library(tidyverse)
 library(broom)
 library(touringplans)
 library(splines)
-
-seven_dwarfs <- seven_dwarfs_train_2018 |>
-  filter(wait_hour == 9)
 ```
 
 For this set of exercises, we'll use g-computation to calculate a causal effect for continuous exposures.
@@ -117,8 +114,8 @@ First, let's fit the model.
 
 ```{r}
 _______ ___ _______(
-  wait_minutes_actual_avg ~ ns(_______, df = 5)*park_extra_magic_morning + _______ + _______ + _______, 
-  data = seven_dwarfs
+  wait_minutes_actual_avg ~ ns(_______, df = 2)*park_extra_magic_morning + _______ + _______ + _______, 
+  data = wait_times
 )
 ```
 
@@ -131,10 +128,10 @@ Now that we've fit a model, we need to clone our data set. To do this, we'll sim
 3. Save the predicted data sets as`predicted_thirty` and `predicted_sixty`.
 
 ```{r}
-_______ <- seven_dwarfs |>
+_______ <- wait_times |>
   _______
 
-_______ <- seven_dwarfs |>
+_______ <- wait_times |>
   _______
 
 predicted_thirty <- standardized_model |>
@@ -177,10 +174,10 @@ library(rsample)
 fit_gcomp <- function(split, ...) { 
   .df <- analysis(split) 
   
-  # fit outcome model. remember to model using `.df` instead of `seven_dwarfs`
+  # fit outcome model. remember to model using `.df` instead of `wait_times`
   
   
-  # clone datasets. remember to clone `.df` instead of `seven_dwarfs`
+  # clone datasets. remember to clone `.df` instead of `wait_times`
   
   
   # predict actual wait time for each cloned dataset
@@ -197,7 +194,7 @@ fit_gcomp <- function(split, ...) {
     pivot_longer(everything(), names_to = "term", values_to = "estimate")
 }
 
-gcomp_results <- bootstraps(seven_dwarfs, 1000, apparent = TRUE) |>
+gcomp_results <- bootstraps(wait_times, 1000, apparent = TRUE) |>
   mutate(results = map(splits, ______))
 
 # using bias-corrected confidence intervals
 
@@ -119,7 +119,8 @@ mosquito_dag |>
   theme_dag(base_size = 14) +
   theme(legend.position = "none") +
   labs(caption = "Thanks to Andrew Heiss for the data!") +
-  coord_cartesian(clip = "off")
+  coord_cartesian(clip = "off") +
+  ggokabeito::scale_color_okabe_ito(na.value = "grey90")
 ```
 
 # Your Turn