r-causal
diff --git a/‎exercises/06-intro-pscores-exercises.qmd‎
Lines changed: 43 additions & 18 deletions b/‎exercises/06-intro-pscores-exercises.qmd‎
Lines changed: 43 additions & 18 deletions
diff --git a/‎exercises/09-outcome-model-exercises.qmd‎
Lines changed: 19 additions & 1 deletion b/‎exercises/09-outcome-model-exercises.qmd‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎exercises/10-continuous-g-computation-exercises.qmd‎
Lines changed: 52 additions & 22 deletions b/‎exercises/10-continuous-g-computation-exercises.qmd‎
Lines changed: 52 additions & 22 deletions
diff --git a/‎exercises/13-bonus-selection-bias-exercises.qmd‎
Lines changed: 1 addition & 0 deletions b/‎exercises/13-bonus-selection-bias-exercises.qmd‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎exercises/14-bonus-continuous-pscores-exercises.qmd‎
Lines changed: 51 additions & 21 deletions b/‎exercises/14-bonus-continuous-pscores-exercises.qmd‎
Lines changed: 51 additions & 21 deletions
diff --git a/‎slides/raw/01-causal_modeling_whole_game.qmd‎
Lines changed: 1 addition & 1 deletion b/‎slides/raw/01-causal_modeling_whole_game.qmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎slides/raw/06-pscores.qmd‎
Lines changed: 1 addition & 1 deletion b/‎slides/raw/06-pscores.qmd‎
Lines changed: 1 addition & 1 deletion
@@ -9,6 +9,7 @@ library(tidyverse)
 library(broom)
 library(touringplans)
 library(ggdag)
+library(ggokabeito)
 ```
 
 For Your Turn, we'll be looking at an example using Walt Disney World ride data from the touringplans package.
@@ -25,39 +26,63 @@ Below is a proposed DAG for this question.
 
 ```{r}
 set.seed(1234)
-# set up DAG coordinates
+
 coord_dag <- list(
-  x = c(season = 0, close = 0, weather_wdwhigh = -1, x = 1, y = 2),
-  y = c(season = -1, close = 1, weather_wdwhigh = 0.25, x = 0, y = 0)
+  x = c(Season = 0, close = 0, weather = -1, x = 1, y = 2),
+  y = c(Season = -1, close = 1, weather = 0, x = 0, y = 0)
 )
 
-# nicer labels for the nodes
 labels <- c(
   x = "Extra Magic Morning",
   y = "Average wait",
-  season = "Ticket Season",
-  weather_wdwhigh = "Historic high temperature",
+  Season = "Ticket Season",
+  weather = "Historic high temperature",
   close = "Time park closed"
 )
 
-# visualize the dag
 dagify(
-  y ~ x + close + season + weather_wdwhigh,
-  x ~ weather_wdwhigh + close + season,
+  y ~ x + close + Season + weather,
+  x ~ weather + close + Season,
   coords = coord_dag,
-  labels = labels
+  labels = labels,
+  exposure = "x",
+  outcome = "y"
 ) |>
-  ggdag(use_labels = "label", text = FALSE) +
-  theme_void() +
+  tidy_dagitty() |>
+  node_status() |>
+  ggplot(
+    aes(x, y, xend = xend, yend = yend, color = status)
+  ) +
+  geom_dag_edges_arc(curvature = c(rep(0, 5), .3, 0)) +
+  geom_dag_point() +
+  geom_dag_label_repel(
+    aes(x, y, label = label),
+    box.padding = 3.5, 
+    inherit.aes = FALSE,
+    max.overlaps = Inf, 
+    family = "sans",
+    seed = 1630,
+    label.size = NA, 
+    label.padding = 0.1,
+    size = 14 / 3
+  ) +
+  scale_color_okabe_ito(na.value = "grey90") +
+  theme_dag() +
+  theme(
+    legend.position = "none",
+    axis.text.x = element_text()
+  ) +
+  coord_cartesian(clip = "off") +
   scale_x_continuous(
-    limits = c(-1.25, 2.25), 
-    breaks = c(-1, 0, 1, 2), 
+    limits = c(-1.25, 2.25),
+    breaks = c(-1, 0, 1, 2),
     labels = c(
-      "\n(one year ago)", "\n(6 months ago)",
-      "\n(3 months ago)", "9am - 10am\n(Today)"
+      "\n(one year ago)",
+      "\n(6 months ago)",
+      "\n(3 months ago)",
+      "5pm - 6pm\n(Today)"
     )
-  ) +
-  theme(axis.text.x = element_text())
+  )
 ```
 
 Here we are proposing that there are three confounders: the historic high temperature on the day, the time the park closed, and the ticket season: value, regular, or peak.
 
@@ -45,9 +45,27 @@ Bootstrap this result 1000 times.
 set.seed(1234)
 
 ipw_results <- ____(___, 1000, apparent = TRUE) |>
-  mutate(results = map(splits, _____)) 
+  mutate(boot_fits = map(splits, _____)) 
 ```
 
+Check out the distribution of estimates (**no need to change this code**)
+
+```{r}
+#| eval: false
+ipw_results |>
+  mutate(
+    estimate = map_dbl(
+      boot_fits,
+      # pull the `estimate` for `qsmk` for each fit
+      \(.fit) .fit |>
+        filter(term == "qsmk") |>
+        pull(estimate)
+    )
+  ) |>
+  ggplot(aes(estimate)) +
+  geom_histogram(fill = "#D55E00FF", color = "white", alpha = 0.8) + 
+  theme_minimal()
+```
 
 Calculate the confidence interval
 
 
@@ -20,37 +20,67 @@ In the touringplans data set, we have information about the posted waiting times
 #| message: false
 #| warning: false
 library(ggdag)
+library(ggokabeito)
 
 coord_dag <- list(
-  x = c(wdw_ticket_season = -1, close = -1, weather_wdwhigh = -2, extra_magic_morning = 0, avg_spostmin = 1, avg_sactmin = 2),
-  y = c(wdw_ticket_season = -1, close = 1, weather_wdwhigh = 0.25, extra_magic_morning = 0, avg_spostmin = 0, avg_sactmin = 0)
+  x = c(Season = -1, close = -1, weather = -2, extra = 0, x = 1, y = 2),
+  y = c(Season = -1, close = 1, weather = 0, extra = 0, x = 0, y = 0)
 )
 
 labels <- c(
-  avg_sactmin = "Average actual wait",
-  avg_spostmin = "Average posted wait ",
-  extra_magic_morning = "Extra Magic Morning",
-  wdw_ticket_season = "Ticket Season",
-  weather_wdwhigh = "Historic high temperature",
+  extra = "Extra Magic Morning",
+  x = "Average posted wait ",
+  y = "Average acutal wait",
+  Season = "Ticket Season",
+  weather = "Historic high temperature",
   close = "Time park closed"
 )
 
-wait_time_dag <- dagify(
-  avg_sactmin ~ avg_spostmin + close + wdw_ticket_season + weather_wdwhigh + extra_magic_morning,
-  avg_spostmin ~ weather_wdwhigh + close + wdw_ticket_season + extra_magic_morning,
+dagify(
+  y ~ x + close + Season + weather + extra,
+  x ~ weather + close + Season + extra,
+  extra ~ weather + close + Season,
   coords = coord_dag,
-  labels = labels
-)
-
-wait_time_dag |>
-  ggdag(use_labels = "label", text = FALSE) +
-  theme_void() +
-  scale_x_continuous(
-    limits = c(-2.25, 2.25), 
-    breaks = c(-2, -1, 0, 1, 2), 
-    labels = c("\n(one year ago)", "\n(6 months ago)", "\n(3 months ago)", "8am-9am\n(Today)", "9am-10am\n(Today)")
+  labels = labels,
+  exposure = "x",
+  outcome = "y"
+) |>
+  tidy_dagitty() |>
+  node_status() |>
+  ggplot(
+    aes(x, y, xend = xend, yend = yend, color = status)
+  ) +
+  geom_dag_edges_arc(curvature = c(rep(0, 7), .2, 0, .2, .2, 0), edge_colour = "grey70") +
+  geom_dag_point() +
+  geom_dag_label_repel(
+    aes(x, y, label = label),
+    box.padding = 3.5, 
+    inherit.aes = FALSE,
+    max.overlaps = Inf, 
+    family = "sans",
+    seed = 1602,
+    label.size = NA, 
+    label.padding = 0.1,
+    size = 14 / 3
+  )  + 
+  scale_color_okabe_ito(na.value = "grey90") +
+  theme_dag() +
+  theme(
+    legend.position = "none",
+    axis.text.x = element_text()
   ) +
-  theme(axis.text.x = element_text())
+  coord_cartesian(clip = "off") +
+  scale_x_continuous(
+    limits = c(-2.25, 2.25),
+    breaks = c(-2, -1, 0, 1, 2),
+    labels = c(
+      "\n(one year ago)",
+      "\n(6 months ago)",
+      "\n(3 months ago)",
+      "8am-9am\n(Today)",
+      "9am-10am\n(Today)"
+    )
+  )
 ```
 
 First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.
@@ -151,7 +181,7 @@ fit_gcomp <- function(split, ...) {
   
   
   # predict actual wait time for each cloned dataset
-
+  
   
   # calculate ATE
   bind_cols(predicted_yes, predicted_no) |>
 
@@ -8,6 +8,7 @@ format: html
 library(tidyverse)
 library(broom)
 library(propensity)
+library(causaldata)
 ```
 
 In this example, we'll consider loss to follow-up in the NHEFS study. We'll use the binary exposure we used earlier in the workshop: does quitting smoking (`smk`) increase weight (`wt82_71`)? This time, however, we'll adjust for loss to followup (people who dropped out of the study between observation periods) using inverse probability of censoring weights.
 
@@ -20,37 +20,67 @@ In the touringplans data set, we have information about the posted waiting times
 #| message: false
 #| warning: false
 library(ggdag)
+library(ggokabeito)
 
 coord_dag <- list(
-  x = c(wdw_ticket_season = -1, close = -1, weather_wdwhigh = -2, extra_magic_morning = 0, avg_spostmin = 1, avg_sactmin = 2),
-  y = c(wdw_ticket_season = -1, close = 1, weather_wdwhigh = 0.25, extra_magic_morning = 0, avg_spostmin = 0, avg_sactmin = 0)
+  x = c(Season = -1, close = -1, weather = -2, extra = 0, x = 1, y = 2),
+  y = c(Season = -1, close = 1, weather = 0, extra = 0, x = 0, y = 0)
 )
 
 labels <- c(
-  avg_sactmin = "Average actual wait",
-  avg_spostmin = "Average posted wait ",
-  extra_magic_morning = "Extra Magic Morning",
-  wdw_ticket_season = "Ticket Season",
-  weather_wdwhigh = "Historic high temperature",
+  extra = "Extra Magic Morning",
+  x = "Average posted wait ",
+  y = "Average acutal wait",
+  Season = "Ticket Season",
+  weather = "Historic high temperature",
   close = "Time park closed"
 )
 
-wait_time_dag <- dagify(
-  avg_sactmin ~ avg_spostmin + close + wdw_ticket_season + weather_wdwhigh + extra_magic_morning,
-  avg_spostmin ~ weather_wdwhigh + close + wdw_ticket_season + extra_magic_morning,
+dagify(
+  y ~ x + close + Season + weather + extra,
+  x ~ weather + close + Season + extra,
+  extra ~ weather + close + Season,
   coords = coord_dag,
-  labels = labels
-)
-
-wait_time_dag |>
-  ggdag(use_labels = "label", text = FALSE) +
-  theme_void() +
-  scale_x_continuous(
-    limits = c(-2.25, 2.25), 
-    breaks = c(-2, -1, 0, 1, 2), 
-    labels = c("\n(one year ago)", "\n(6 months ago)", "\n(3 months ago)", "8am-9am\n(Today)", "9am-10am\n(Today)")
+  labels = labels,
+  exposure = "x",
+  outcome = "y"
+) |>
+  tidy_dagitty() |>
+  node_status() |>
+  ggplot(
+    aes(x, y, xend = xend, yend = yend, color = status)
   ) +
-  theme(axis.text.x = element_text())
+  geom_dag_edges_arc(curvature = c(rep(0, 7), .2, 0, .2, .2, 0), edge_colour = "grey70") +
+  geom_dag_point() +
+  geom_dag_label_repel(
+    aes(x, y, label = label),
+    box.padding = 3.5, 
+    inherit.aes = FALSE,
+    max.overlaps = Inf, 
+    family = "sans",
+    seed = 1602,
+    label.size = NA, 
+    label.padding = 0.1,
+    size = 14 / 3
+  )  + 
+  scale_color_okabe_ito(na.value = "grey90") +
+  theme_dag() +
+  theme(
+    legend.position = "none",
+    axis.text.x = element_text()
+  ) +
+  coord_cartesian(clip = "off") +
+  scale_x_continuous(
+    limits = c(-2.25, 2.25),
+    breaks = c(-2, -1, 0, 1, 2),
+    labels = c(
+      "\n(one year ago)",
+      "\n(6 months ago)",
+      "\n(3 months ago)",
+      "8am-9am\n(Today)",
+      "9am-10am\n(Today)"
+    )
+  )
 ```
 
 First, let’s wrangle our data to address our question: do posted wait times at 8 affect actual weight times at 9? We’ll join the baseline data (all covariates and posted wait time at 8) with the outcome (average actual time). We also have a lot of missingness for `avg_sactmin`, so we’ll drop unobserved values for now.
 
@@ -511,7 +511,7 @@ fit_ipw <- function(split, ...) {
 #| cache: true
 #| code-line-numbers: "|2-3"
 # fit ipw model to bootstrapped samples
-ipw_results <- bootstraps(nhefs_complete, 1000, apparent = TRUE) |>
+ipw_results <- bootstraps(nhefs_complete_uc, 1000, apparent = TRUE) |>
   mutate(results = map(splits, fit_ipw)) 
 ```
 
 
@@ -164,7 +164,7 @@ dagify(
   ggplot(
     aes(x, y, xend = xend, yend = yend, color = status)
   ) +
-  geom_dag_edges_arc(curvature = c(rep(0, 5), .3)) +
+  geom_dag_edges_arc(curvature = c(rep(0, 5), .3, 0)) +
   geom_dag_point() +
   geom_dag_label_repel(seed = 1630) +
   scale_color_okabe_ito(na.value = "grey90") +