epiforecasts · kaitejohnson · Jan 9, 2026 · Dec 1, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/R/EDA_plots.R b/R/EDA_plots.R
@@ -266,13 +266,13 @@ get_bar_chart_overall_scores <- function(scores) {
 #' @returns ggplot object
 #' @autoglobal
 get_plot_scores_by_date <- function(scores) {
-  scores_by_loc <- scores |>
+  scores_by_date <- scores |>
     summarise_scores(by = c(
       "model", "include_ww",
       "hosp_data_real_time", "forecast_date"
     )) |>
     mutate(model_ww = glue::glue("{model}-{include_ww}-{hosp_data_real_time}"))
-  p <- ggplot(scores_by_loc) +
+  p <- ggplot(scores_by_date) +
     geom_bar(
       aes(
         x = forecast_date,
@@ -287,3 +287,36 @@ get_plot_scores_by_date <- function(scores) {
     ggtitle("Scores across all locations by forecast dates")
   return(p)
 }
+
+#' Get scatterplot of scores by forecast date and location
+#'
+#' @param scores Data.frame of scores from across locations and forecast dates
+#'
+#' @importFrom ggplot2 geom_bar
+#' @importFrom scoringutils summarise_scores
+#' @returns ggplot object
+#' @autoglobal
+get_scatterplot_scores <- function(scores) {
+  scores_by_forecast <- scores |>
+    summarise_scores(by = c(
+      "model", "include_ww",
+      "hosp_data_real_time", "forecast_date",
+      "location"
+    )) |>
+    filter(model == "wwinference") |>
+    pivot_wider(
+      names_from = include_ww,
+      values_from = wis,
+      id_cols = c(forecast_date, location)
+    ) |>
+    rename(
+      ww_plus_hosp = TRUE,
+      hosp_only = FALSE
+    )
+
+
+  p <- ggplot(scores_by_forecast) +
+    geom_point(aes(x = hosp_only, y = ww_plus_hosp)) +
+    geom_line(aes(x = hosp_only, y = hosp_only), linetype = "dashed")
+  return(p)
+}
diff --git a/R/convert_to_su_object.R b/R/convert_to_su_object.R
@@ -1,20 +1,20 @@
 #' Convert the scores to a scoringutils object
 #'
-#' @param scores_data Data.frame from Variant Nowcast Hub GitHub
+#' @param scores_raw Data.frame of scores
 #' @importFrom data.table setattr as.data.table
-#' @importFrom rlang arg_match
 #' @importFrom dplyr rename select
 #' @returns scoringutils object
-convert_to_su_object <- function(scores_data) {
-  scores2 <- data.table::as.data.table(scores_data)
-  class(scores2) <- c("scores", class(scores2))
+#' @autoglobal
+convert_to_su_object <- function(scores_raw) {
+  scores <- data.table::as.data.table(scores_raw)
+  class(scores) <- c("scores", class(scores))
   scores_su <- data.table::setattr(
-    scores2,
+    scores,
     "metrics",
     c(
-      "wis", "underprediction", "overprediction", "dispersion",
-      "bias", "interval_coverage_50", "interval_coverage_90",
-      "ae_median"
+      "wis", "overprediction", "underprediction",
+      "dispersion", "bias", "interval_coverage_50",
+      "interval_coverage_90", "ae_median"
     )
   )
   return(scores_su)

diff --git a/R/globals.R b/R/globals.R
@@ -31,6 +31,13 @@ utils::globalVariables(c(
   "forecast_date", # <get_plot_scores_by_date>
   "wis", # <get_plot_scores_by_date>
   "model_ww", # <get_plot_scores_by_date>
+  "model", # <get_scatterplot_scores>
+  "include_ww", # <get_scatterplot_scores>
+  "wis", # <get_scatterplot_scores>
+  "forecast_date", # <get_scatterplot_scores>
+  "location", # <get_scatterplot_scores>
+  "hosp_only", # <get_scatterplot_scores>
+  "ww_plus_hosp", # <get_scatterplot_scores>
   "site", # <fit_wwinference_wrapper>
   "lab", # <fit_wwinference_wrapper>
   "log_genome_copies_per_ml", # <fit_wwinference_wrapper>

diff --git a/R/prep_scores_to_model.R b/R/prep_scores_to_model.R
@@ -0,0 +1,9 @@
+prep_scores_to_model <- function(scores_long,
+                                 ww_metadata) {
+  # Pivot scores from long to wide
+
+  # Join wastewater metadata
+
+  # Placeholder
+  return(NULL)
+}
diff --git a/_targets.R b/_targets.R
@@ -1,5 +1,13 @@
-# Targets script for generating forecasts and performing immediate
-# post-processing (quantiling and scoring)
+# Targets script for analysing forecasts and scores
+# This pipeline assumes that the `output/` folder contains:
+# - overall_data_all_runs/scores.csv: a single file with all of the scores
+# for all forecasts for the 3 models (wwinference with and without ww and
+# baseline ARIMA)
+# - individual_forecasts_all_runs/{forecast_date}/{location}/data: hospital
+# admissions quantiles for wwinference with and without wastewater, R(t)
+# estimates for the location with and without wastewater, and predicted
+# quantiled wastewater concentrations
+
 
 # The pipeline can be run using `tar_make()`
 
@@ -28,9 +36,6 @@ functions <- list.files(here("R"), full.names = TRUE)
 walk(functions, source)
 rm("functions")
 
-n_workers <- as.integer(floor(future::availableCores() / 4))
-plan(multisession, workers = n_workers)
-
 # load target modules
 targets <- list.files(here("targets"), full.names = TRUE)
 targets <- grep("*\\.R", targets, value = TRUE)
@@ -59,45 +64,41 @@ tar_option_set(
   error = "continue"
 )
 
-## Set up the date:location:model:ww+/-:right-trunc+/- permutations
-set_up <- list(
-  create_permutations_targets
+# Analysis config
+analysis_config <- list(
+  # Full set of dates and locations and models for which the model
+  # was run for
+  create_permutations_targets,
+  # Set of dates and locations to focus on in example figures +
+  # specifications of any post-processing model outputs
+  analysis_config_targets
 )
 
-
-## Iterate over all permutations. For each:
-# - extract the necessary data
-# - pre-process the data based on the model's requirements
-# - fit the model
-# - extract posterior hospital admissions (calibration and forecast)
-# - score the forecasts using CRPS and extract
-# - quantile the calibration and forecasted admissions and extract
-# - extract input data (hosp and/or ww)
-# - extract model diagnostics
-
-# Current set up: uses the `scenarios` tibble to do dynamic branching within
-# each function via pattern = map(ind_data_created, scenarios)
-load_data <- list(
-  # Load data for each location/forecast date combination
-  load_data_targets,
-  load_baseline_data_targets
-)
+# Wastewater metadata
 get_metadata <- list(
   get_metadata_targets
 )
-fit_models <- list(
-  fit_model_targets,
-  fit_baseline_model_targets
+
+# Secondary outputs
+secondary_outputs <- list(
+  # GAM meta-model on scores ()
+  run_gam_targets
+  # compute coverage metrics (?)
 )
 
-scoring <- list(
-  scoring_targets
+# Figures
+plot_targets <- list(
+  analysis_EDA_plot_targets
+  # Fig 1: visual comparison for a single forecast date
+  # Fig 2: visual comparison + scores across forecast dates
+  # Fig 3: overall, by horizon, by location, by forecast date
+  # by location and forecast date
+  # Fig 4: Model-based evaluation results
 )
 
 list(
-  set_up,
-  load_data,
+  analysis_config,
   get_metadata,
-  fit_models,
-  scoring
+  secondary_outputs,
+  plot_targets
 )
diff --git a/_targets_model_run.R b/_targets_model_run.R
@@ -0,0 +1,100 @@
+# Targets script for generating forecasts and performing immediate
+# post-processing (quantiling and scoring)
+
+# The pipeline can be run using `tar_make(script = "_targets_model_run.R")`
+
+library(targets)
+library(jsonlite)
+library(httr)
+library(tarchetypes)
+library(wwinference)
+library(dplyr)
+library(ggplot2)
+library(readr)
+library(here)
+library(purrr)
+library(lubridate)
+library(tidyr)
+library(glue)
+library(fs)
+library(rlang)
+library(scoringutils)
+library(forecast)
+library(future)
+library(future.callr)
+
+# load functions
+functions <- list.files(here("R"), full.names = TRUE)
+walk(functions, source)
+rm("functions")
+
+n_workers <- as.integer(floor(future::availableCores() / 4))
+plan(multisession, workers = n_workers)
+
+# load target modules
+targets <- list.files(here("targets"), full.names = TRUE)
+targets <- grep("*\\.R", targets, value = TRUE)
+purrr::walk(targets, source)
+
+tar_option_set(
+  packages = c(
+    "wwinference",
+    "tibble",
+    "dplyr",
+    "ggplot2",
+    "readr",
+    "lubridate",
+    "tidyr",
+    "glue",
+    "forecast",
+    "jsonlite",
+    "httr"
+  ),
+  workspace_on_error = TRUE,
+  storage = "worker",
+  retrieval = "worker",
+  memory = "transient",
+  garbage_collection = TRUE,
+  format = "parquet", # default storage format
+  error = "continue"
+)
+
+## Set up the date:location:model:ww+/-:right-trunc+/- permutations
+set_up <- list(
+  create_permutations_targets
+)
+
+
+## Iterate over all permutations. For each:
+# - extract the necessary data
+# - pre-process the data based on the model's requirements
+# - fit the model
+# - extract posterior hospital admissions (calibration and forecast)
+# - score the forecasts using CRPS and extract
+# - quantile the calibration and forecasted admissions and extract
+# - extract input data (hosp and/or ww)
+# - extract model diagnostics
+
+# Current set up: uses the `scenarios` tibble to do dynamic branching within
+# each function via pattern = map(ind_data_created, scenarios)
+load_data <- list(
+  # Load data for each location/forecast date combination
+  load_data_targets,
+  load_baseline_data_targets
+)
+
+fit_models <- list(
+  fit_model_targets,
+  fit_baseline_model_targets
+)
+
+scoring <- list(
+  scoring_targets
+)
+
+list(
+  set_up,
+  load_data,
+  fit_models,
+  scoring
+)
diff --git a/man/convert_to_su_object.Rd b/man/convert_to_su_object.Rd
diff --git a/man/get_scatterplot_scores.Rd b/man/get_scatterplot_scores.Rd
diff --git a/scratch/explore_results.R b/scratch/explore_results.R
@@ -0,0 +1,33 @@
+scores <- read_csv(file.path("output", "overall_data", "scores.csv"))
+
+scores_overall <- scores |>
+  group_by(model, include_ww) |>
+  summarise(wis = mean(wis)) |>
-  summarise(wis = mean(wis)) |>
+  summarise(wis = mean(wis, na.rm = TRUE)) |>
-  summarise(wis = mean(wis)) |>
+  summarise(wis = mean(wis, na.rm = TRUE)) |>
+  ungroup() |>
+  mutate(model_ww = glue::glue("{model}-{include_ww}"))
+
+
+ggplot(scores_overall) +
+  geom_bar(aes(x = model_ww, y = wis, fill = model_ww),
+    stat = "identity", position = "stack"
+  )
+
+rwis <- scores_overall$wis / scores_overall$wis[2]
+
+scores_by_date <- scores |>
+  group_by(model, include_ww, forecast_date) |>
+  summarise(wis = mean(wis)) |>
+  ungroup() |>
+  mutate(model_ww = glue::glue("{model}-{include_ww}"))
+
+ggplot(scores_by_date) +
+  geom_line(aes(x = forecast_date, y = wis, color = model_ww))
+
+scores_by_loc <- scores |>
+  group_by(model, include_ww, location) |>
+  summarise(wis = mean(wis)) |>
+  ungroup() |>
+  mutate(model_ww = glue::glue("{model}-{include_ww}"))
+
+ggplot(scores_by_loc) +
+  geom_point(aes(x = location, y = wis, color = model_ww))
diff --git a/targets/analysis_EDA_plot_targets.R b/targets/analysis_EDA_plot_targets.R
@@ -0,0 +1,10 @@
+analysis_EDA_plot_targets <- list(
+  tar_target(
+    name = plot_scores_by_date,
+    command = get_plot_scores_by_date(scores)
+  ),
+  tar_target(
+    name = scatterplot_scores,
+    command = get_scatterplot_scores(scores)
+  )
+)
diff --git a/targets/analysis_config_targets.R b/targets/analysis_config_targets.R
@@ -0,0 +1,17 @@
+analysis_config_targets <- list(
+  tar_target(
+    ww_data_post,
+    get_ww_as_of_forecast_date(
+      forecast_date = scenarios$forecast_date,
+      location_name = scenarios$location_name,
+      location_abbr = scenarios$location_abbr,
+      calibration_period = calibration_period_wwinference,
+      path_to_lod_vals = path_to_lod_vals
+    ),
+    pattern = map(scenarios)
+  ),
+  tar_target(
+    name = scores_fp,
+    command = file.path("output", "overall_data_all_runs", "scores.csv")
+  )
+)