epiforecasts
diff --git a/‎R/fig_overall_scores.R‎
Lines changed: 48 additions & 5 deletions b/‎R/fig_overall_scores.R‎
Lines changed: 48 additions & 5 deletions
diff --git a/‎R/globals.R‎
Lines changed: 0 additions & 1 deletion b/‎R/globals.R‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/supplement.docx‎
284 KB b/‎docs/supplement.docx‎
284 KB
diff --git a/‎docs/supplement.qmd‎
Lines changed: 14 additions & 5 deletions b/‎docs/supplement.qmd‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎man/get_plot_avg_rel_skill_by_loc.Rd‎
Lines changed: 6 additions & 0 deletions b/‎man/get_plot_avg_rel_skill_by_loc.Rd‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎man/get_plot_avg_rel_skill_by_t.Rd‎
Lines changed: 7 additions & 2 deletions b/‎man/get_plot_avg_rel_skill_by_t.Rd‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎man/get_plot_avg_rel_skill_overall.Rd‎
Lines changed: 7 additions & 1 deletion b/‎man/get_plot_avg_rel_skill_overall.Rd‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎targets/fig_overall_targets.R‎
Lines changed: 26 additions & 12 deletions b/‎targets/fig_overall_targets.R‎
Lines changed: 26 additions & 12 deletions
@@ -983,6 +983,8 @@ get_scores_by_nowcast_date <- function(a, b, c, d, e, f, g, h, i, j, k, l,
 #'
 #' @param scores_obj Scoringutils scores object
 #' @param seq_counts_by_loc Total sequences for each location
+#' @param name_of_plot Name of plot
+#' @param output_fp directory to save figures
 #' @param score_type Character string indicating which score metric to use
 #' @param remove_legend Boolean indicating whether to keep legend, default
 #'   is TRUE.
@@ -993,6 +995,10 @@ get_scores_by_nowcast_date <- function(a, b, c, d, e, f, g, h, i, j, k, l,
 #' @autoglobal
 get_plot_avg_rel_skill_by_loc <- function(scores_obj,
                                           seq_counts_by_loc,
+                                          plot_name,
+                                          output_fp = file.path(
+                                            "output", "figs", "supp"
+                                          ),
                                           score_type = c(
                                             "brier_score",
                                             "energy_score"
@@ -1067,13 +1073,23 @@ get_plot_avg_rel_skill_by_loc <- function(scores_obj,
       shape = "none"
     )
   }
+  ggsave(
+    file.path(output_fp, glue::glue("{plot_name}.png")),
+    plot = p,
+    width = 10,
+    height = 6,
+    dpi = 300
+  )
+
   return(p)
 }
 
 #' Brier/Energy Relative averaged across locations by nowcast date
 #'
 #' @param scores_obj Scoringutils scores object
 #' @param score_type Character string indicating which score metric to use
+#' #' @param name_of_plot Name of plot
+#' @param output_fp directory to save figures
 #' @param remove_legend Boolean indicating whether to keep legend, default
 #'   is TRUE.
 #' @param title Character string indicating title, default is NULL.
@@ -1083,12 +1099,16 @@ get_plot_avg_rel_skill_by_loc <- function(scores_obj,
 #' @returns ggplot object
 #' @autoglobal
 get_plot_avg_rel_skill_by_t <- function(scores_obj,
+                                        plot_name,
+                                        output_fp = file.path(
+                                          "output", "figs", "supp"
+                                        ),
                                         score_type = c(
                                           "brier_score",
                                           "energy_score"
                                         ),
                                         rel_skill_plot = TRUE,
-                                        remove_legend = TRUE,
+                                        remove_legend = FALSE,
                                         title = NULL) {
   score_type <- rlang::arg_match(score_type)
   plot_components_list <- plot_components()
@@ -1150,7 +1170,6 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
     scale_y_continuous(trans = "log10") +
     coord_cartesian(ylim = c(1 / 3, 3)) +
     theme(
-      axis.text.x = element_blank(),
       axis.title.x = element_text(size = 12)
     )
 
@@ -1164,13 +1183,23 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
   if (!is.null(title)) {
     p <- p + ggtitle(glue::glue("{title}"))
   }
+
+  ggsave(
+    file.path(output_fp, glue::glue("{plot_name}.png")),
+    plot = p,
+    width = 10,
+    height = 6,
+    dpi = 300
+  )
   return(p)
 }
 
 #' Brier/Energy Relative skill averaged by model
 #'
 #' @param scores_obj Scoringutils scores object
 #' @param score_type Character string indicating which score metric to use
+#' @param name_of_plot Name of plot
+#' @param output_fp directory to save figures
 #' @param remove_legend Boolean indicating whether to keep legend, default
 #'   is TRUE.
 #' @param add_shape Boolean indicating whether to add the shape legend,
@@ -1182,11 +1211,15 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
 #' @returns ggplot object
 #' @autoglobal
 get_plot_avg_rel_skill_overall <- function(scores_obj,
+                                           plot_name,
+                                           output_fp = file.path(
+                                             "output", "figs", "supp"
+                                           ),
                                            score_type = c(
                                              "brier_score",
                                              "energy_score"
                                            ),
-                                           remove_legend = TRUE,
+                                           remove_legend = FALSE,
                                            add_shape = FALSE,
                                            title = NULL) {
   score_type <- rlang::arg_match(score_type)
@@ -1243,12 +1276,14 @@ get_plot_avg_rel_skill_overall <- function(scores_obj,
     coord_cartesian(ylim = c(1 / 1.8, 1.8)) +
     guides(
       color = guide_legend(
+        position = "top",
         title.position = "top",
-        nrow = 1
+        nrow = 3
       ),
       shape = guide_legend(
+        position = "top",
         title.position = "top",
-        nrow = 1
+        nrow = 3
       )
     )
 
@@ -1272,5 +1307,13 @@ get_plot_avg_rel_skill_overall <- function(scores_obj,
   if (!is.null(title)) {
     p <- p + ggtitle(glue::glue("{title}"))
   }
+
+  ggsave(
+    file.path(output_fp, glue::glue("{plot_name}.png")),
+    plot = p,
+    width = 8,
+    height = 10,
+    dpi = 300
+  )
   return(p)
 }
@@ -132,7 +132,6 @@ utils::globalVariables(c(
   "nowcast_date", # <get_plot_avg_rel_skill_by_t>
   "scaled_rel_skill", # <get_plot_avg_rel_skill_by_t>
   "model", # <get_plot_avg_rel_skill_overall>
-  "nowcast_date", # <get_plot_avg_rel_skill_overall>
   "scaled_rel_skill", # <get_plot_avg_rel_skill_overall>
   "model_id", # <prepare_submission_data>
   "location", # <prepare_submission_data>
 
@@ -16,7 +16,7 @@ output:
 The following models began submissions following the initial assessment period (October 9th, 2024 to June 4th, 2025).
 
 | Model name | Description | Citation | Data Sources | Locations | Output Type | Ensemble? |
-|----|----|----|----|----|----|----|
+|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
 | `open_hier_mlr` | A Bayesian hierarchical multinomial logistic regression (MLR) model for nowcasting COVID variants using variant counts based on GISAID sequences. Regression coefficients are modeled hierarchically across locations. | Abousamra E, Figgins M, Bedford T (2024) Fitness models provide accurate short-term forecasts of SARS-CoV-2 variant frequency. PLOS Computational Biology 20(9): e1012443. https://doi.org/10.1371/journal.pcbi.1012443 | GISAID | All | Point and probabilistic | No |
 | `gisaid_hier_mlr` | A Bayesian hierarchical multinomial logistic regression (MLR) model for nowcasting COVID variants using variant counts based on INSDC sequences. Regression coefficients are modeled hierarchically across locations. | Abousamra E, Figgins M, Bedford T (2024) Fitness models provide accurate short-term forecasts of SARS-CoV-2 variant frequency. PLOS Computational Biology 20(9): e1012443. https://doi.org/10.1371/journal.pcbi.1012443 | INSDC | All | Point and probabilistic | No |
 | `ensemble` | An ensemble of the hub forecasts, created by taking an equally weighted sample of all forecasts that submit samples for a given week, using the function linear_pool from the hubEnsembles package. | https://github.com/hubverse-org/hubEnsembles/tree/main | Other model submission files | All | Point and probabilistic | No |
@@ -54,10 +54,19 @@ The following models began submissions following the initial assessment period (
 
 ## Additional results
 
-![Fig. S12 Comparison of the observed clade proportions by sequence collection week across nowcast dates (columns) in the California specific data source for sequencing called California COVIDNet (top row) and the NCBI GenBank data provided by the Hub and used by most other models (bottom row). Colors indicate clade, dashed line indicates the nowcast date. In most weeks, the California specifc dataset, California COVIDNet, has observed clade proportions for a more recent week than the NCBI GenBank dataset does for California.](../output/figs/supp/CA_source_seq_props.png)
+![Fig. S12 Average of relative scaled skill scores on the Brier score in the US excluding California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_brier.png)
 
-![Fig. S13 Ratio of the number of sequences available in the NCBI GenBank dataset for California compared to the California-specific dataset across collection dates for a subset of nowcast dates (colums). Patterns indicate that even further back in time, the GenBank dataset at most makes up between 50% and 90% of the California specific data volume, with a particular drop off closer to the nowcast date.](../output/figs/supp/CA_source_seq_level_comparison.png)
+![Fig. S13 Average of relative scaled skill scores on the energy score in the US excluding California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_energy.png)
 
-![Fig. S14 Absolute Brier (top) and energy (bottom) in the U.S. excluding California (left) and California (right).](../output/figs/overall_scores/supp/absolute_scores_by_horizon.png)
+![Fig. S14 Average of relative scaled skill scores on the Brier score in California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_brier_ca.png)
 
-![Fig. S15 Bias over time for three example states in the US during the 25A emergence.](../output/figs/zoom_25A/supp/bias_over_time_25A.png)
+![Fig. S15 Average of relative scaled skill scores on the energy score in California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_energy_ca.png)
+
+
+![Fig. S16 Comparison of the observed clade proportions by sequence collection week across nowcast dates (columns) in the California specific data source for sequencing called California COVIDNet (top row) and the NCBI GenBank data provided by the Hub and used by most other models (bottom row). Colors indicate clade, dashed line indicates the nowcast date. In most weeks, the California specifc dataset, California COVIDNet, has observed clade proportions for a more recent week than the NCBI GenBank dataset does for California.](../output/figs/supp/CA_source_seq_props.png)
+
+![Fig. S17 Ratio of the number of sequences available in the NCBI GenBank dataset for California compared to the California-specific dataset across collection dates for a subset of nowcast dates (colums). Patterns indicate that even further back in time, the GenBank dataset at most makes up between 50% and 90% of the California specific data volume, with a particular drop off closer to the nowcast date.](../output/figs/supp/CA_source_seq_level_comparison.png)
+
+![Fig. S18 Absolute Brier (top) and energy (bottom) in the U.S. excluding California (left) and California (right) by horizon.](../output/figs/overall_scores/supp/absolute_scores_by_horizon.png)
+
+![Fig. S19 Bias over time for three example states in the US during the 25A emergence.](../output/figs/zoom_25A/supp/bias_over_time_25A.png)
@@ -371,44 +371,51 @@ fig_overall_targets <- list(
     command = get_plot_avg_rel_skill_overall(
       scores_obj = su_scores_ep,
       score_type = "brier_score",
-      title = "US minus CA"
+      title = "US excluding CA",
+      plot_name = "avg_rel_skill_brier"
     )
   ),
   tar_target(
     name = plot_avg_overall_rel_skill_energy,
     command = get_plot_avg_rel_skill_overall(
       scores_obj = su_scores_ep,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      title = "US excluding CA",
+      plot_name = "avg_rel_skill_energy"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_brier_by_loc,
     command = get_plot_avg_rel_skill_by_loc(
       scores = su_scores_ep,
       seq_counts_by_loc = seq_counts_by_loc,
-      score_type = "brier_score"
+      score_type = "brier_score",
+      plot_name = "avg_rel_skill_brier_loc"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_brier_by_t,
     command = get_plot_avg_rel_skill_by_t(
       scores = su_scores_ep,
-      score_type = "brier_score"
+      score_type = "brier_score",
+      plot_name = "avg_rel_skill_brier_t"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_energy_by_loc,
     command = get_plot_avg_rel_skill_by_loc(
       scores = su_scores_ep,
       seq_counts_by_loc = seq_counts_by_loc,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      plot_name = "avg_rel_skill_energy_loc"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_energy_by_t,
     command = get_plot_avg_rel_skill_by_t(
       scores = su_scores_ep,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      plot_name = "avg_rel_skill_energy_t"
     )
   ),
   ## CA ---------------------------------------------------------
@@ -417,44 +424,51 @@ fig_overall_targets <- list(
     command = get_plot_avg_rel_skill_overall(
       scores_obj = su_scores_ca,
       score_type = "brier_score",
-      title = "US minus CA"
+      title = "CA",
+      plot_name = "avg_rel_skill_brier_ca"
     )
   ),
   tar_target(
     name = plot_avg_overall_rel_skill_energy_ca,
     command = get_plot_avg_rel_skill_overall(
       scores_obj = su_scores_ca,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      title = "CA",
+      plot_name = "avg_rel_skill_energy_ca"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_brier_by_loc_ca,
     command = get_plot_avg_rel_skill_by_loc(
       scores = su_scores_ca,
       seq_counts_by_loc = seq_counts_by_loc,
-      score_type = "brier_score"
+      score_type = "brier_score",
+      plot_name = "avg_rel_skill_brier_loc_ca"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_brier_by_t_ca,
     command = get_plot_avg_rel_skill_by_t(
       scores = su_scores_ca,
-      score_type = "brier_score"
+      score_type = "brier_score",
+      plot_name = "avg_rel_skill_brier_t_ca"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_energy_by_loc_ca,
     command = get_plot_avg_rel_skill_by_loc(
       scores = su_scores_ca,
       seq_counts_by_loc = seq_counts_by_loc,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      plot_name = "avg_rel_skill_energy_loc_ca"
     )
   ),
   tar_target(
     name = plot_avg_rel_skill_energy_by_t_ca,
     command = get_plot_avg_rel_skill_by_t(
       scores = su_scores_ca,
-      score_type = "energy_score"
+      score_type = "energy_score",
+      plot_name = "avg_rel_skill_energy_t_ca"
     )
   )
 )