Skip to content

Commit cced149

Browse files
author
Kaitlyn Johnson
committed
fix figures and add them to supplement
1 parent 41be0d4 commit cced149

File tree

8 files changed

+108
-26
lines changed

8 files changed

+108
-26
lines changed

R/fig_overall_scores.R

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,8 @@ get_scores_by_nowcast_date <- function(a, b, c, d, e, f, g, h, i, j, k, l,
983983
#'
984984
#' @param scores_obj Scoringutils scores object
985985
#' @param seq_counts_by_loc Total sequences for each location
986+
#' @param name_of_plot Name of plot
987+
#' @param output_fp directory to save figures
986988
#' @param score_type Character string indicating which score metric to use
987989
#' @param remove_legend Boolean indicating whether to keep legend, default
988990
#' is TRUE.
@@ -993,6 +995,10 @@ get_scores_by_nowcast_date <- function(a, b, c, d, e, f, g, h, i, j, k, l,
993995
#' @autoglobal
994996
get_plot_avg_rel_skill_by_loc <- function(scores_obj,
995997
seq_counts_by_loc,
998+
plot_name,
999+
output_fp = file.path(
1000+
"output", "figs", "supp"
1001+
),
9961002
score_type = c(
9971003
"brier_score",
9981004
"energy_score"
@@ -1067,13 +1073,23 @@ get_plot_avg_rel_skill_by_loc <- function(scores_obj,
10671073
shape = "none"
10681074
)
10691075
}
1076+
ggsave(
1077+
file.path(output_fp, glue::glue("{plot_name}.png")),
1078+
plot = p,
1079+
width = 10,
1080+
height = 6,
1081+
dpi = 300
1082+
)
1083+
10701084
return(p)
10711085
}
10721086

10731087
#' Brier/Energy Relative averaged across locations by nowcast date
10741088
#'
10751089
#' @param scores_obj Scoringutils scores object
10761090
#' @param score_type Character string indicating which score metric to use
1091+
#' #' @param name_of_plot Name of plot
1092+
#' @param output_fp directory to save figures
10771093
#' @param remove_legend Boolean indicating whether to keep legend, default
10781094
#' is TRUE.
10791095
#' @param title Character string indicating title, default is NULL.
@@ -1083,12 +1099,16 @@ get_plot_avg_rel_skill_by_loc <- function(scores_obj,
10831099
#' @returns ggplot object
10841100
#' @autoglobal
10851101
get_plot_avg_rel_skill_by_t <- function(scores_obj,
1102+
plot_name,
1103+
output_fp = file.path(
1104+
"output", "figs", "supp"
1105+
),
10861106
score_type = c(
10871107
"brier_score",
10881108
"energy_score"
10891109
),
10901110
rel_skill_plot = TRUE,
1091-
remove_legend = TRUE,
1111+
remove_legend = FALSE,
10921112
title = NULL) {
10931113
score_type <- rlang::arg_match(score_type)
10941114
plot_components_list <- plot_components()
@@ -1150,7 +1170,6 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
11501170
scale_y_continuous(trans = "log10") +
11511171
coord_cartesian(ylim = c(1 / 3, 3)) +
11521172
theme(
1153-
axis.text.x = element_blank(),
11541173
axis.title.x = element_text(size = 12)
11551174
)
11561175

@@ -1164,13 +1183,23 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
11641183
if (!is.null(title)) {
11651184
p <- p + ggtitle(glue::glue("{title}"))
11661185
}
1186+
1187+
ggsave(
1188+
file.path(output_fp, glue::glue("{plot_name}.png")),
1189+
plot = p,
1190+
width = 10,
1191+
height = 6,
1192+
dpi = 300
1193+
)
11671194
return(p)
11681195
}
11691196

11701197
#' Brier/Energy Relative skill averaged by model
11711198
#'
11721199
#' @param scores_obj Scoringutils scores object
11731200
#' @param score_type Character string indicating which score metric to use
1201+
#' @param name_of_plot Name of plot
1202+
#' @param output_fp directory to save figures
11741203
#' @param remove_legend Boolean indicating whether to keep legend, default
11751204
#' is TRUE.
11761205
#' @param add_shape Boolean indicating whether to add the shape legend,
@@ -1182,11 +1211,15 @@ get_plot_avg_rel_skill_by_t <- function(scores_obj,
11821211
#' @returns ggplot object
11831212
#' @autoglobal
11841213
get_plot_avg_rel_skill_overall <- function(scores_obj,
1214+
plot_name,
1215+
output_fp = file.path(
1216+
"output", "figs", "supp"
1217+
),
11851218
score_type = c(
11861219
"brier_score",
11871220
"energy_score"
11881221
),
1189-
remove_legend = TRUE,
1222+
remove_legend = FALSE,
11901223
add_shape = FALSE,
11911224
title = NULL) {
11921225
score_type <- rlang::arg_match(score_type)
@@ -1243,12 +1276,14 @@ get_plot_avg_rel_skill_overall <- function(scores_obj,
12431276
coord_cartesian(ylim = c(1 / 1.8, 1.8)) +
12441277
guides(
12451278
color = guide_legend(
1279+
position = "top",
12461280
title.position = "top",
1247-
nrow = 1
1281+
nrow = 3
12481282
),
12491283
shape = guide_legend(
1284+
position = "top",
12501285
title.position = "top",
1251-
nrow = 1
1286+
nrow = 3
12521287
)
12531288
)
12541289

@@ -1272,5 +1307,13 @@ get_plot_avg_rel_skill_overall <- function(scores_obj,
12721307
if (!is.null(title)) {
12731308
p <- p + ggtitle(glue::glue("{title}"))
12741309
}
1310+
1311+
ggsave(
1312+
file.path(output_fp, glue::glue("{plot_name}.png")),
1313+
plot = p,
1314+
width = 8,
1315+
height = 10,
1316+
dpi = 300
1317+
)
12751318
return(p)
12761319
}

R/globals.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ utils::globalVariables(c(
132132
"nowcast_date", # <get_plot_avg_rel_skill_by_t>
133133
"scaled_rel_skill", # <get_plot_avg_rel_skill_by_t>
134134
"model", # <get_plot_avg_rel_skill_overall>
135-
"nowcast_date", # <get_plot_avg_rel_skill_overall>
136135
"scaled_rel_skill", # <get_plot_avg_rel_skill_overall>
137136
"model_id", # <prepare_submission_data>
138137
"location", # <prepare_submission_data>

docs/supplement.docx

284 KB
Binary file not shown.

docs/supplement.qmd

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ output:
1616
The following models began submissions following the initial assessment period (October 9th, 2024 to June 4th, 2025).
1717

1818
| Model name | Description | Citation | Data Sources | Locations | Output Type | Ensemble? |
19-
|----|----|----|----|----|----|----|
19+
|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
2020
| `open_hier_mlr` | A Bayesian hierarchical multinomial logistic regression (MLR) model for nowcasting COVID variants using variant counts based on GISAID sequences. Regression coefficients are modeled hierarchically across locations. | Abousamra E, Figgins M, Bedford T (2024) Fitness models provide accurate short-term forecasts of SARS-CoV-2 variant frequency. PLOS Computational Biology 20(9): e1012443. https://doi.org/10.1371/journal.pcbi.1012443 | GISAID | All | Point and probabilistic | No |
2121
| `gisaid_hier_mlr` | A Bayesian hierarchical multinomial logistic regression (MLR) model for nowcasting COVID variants using variant counts based on INSDC sequences. Regression coefficients are modeled hierarchically across locations. | Abousamra E, Figgins M, Bedford T (2024) Fitness models provide accurate short-term forecasts of SARS-CoV-2 variant frequency. PLOS Computational Biology 20(9): e1012443. https://doi.org/10.1371/journal.pcbi.1012443 | INSDC | All | Point and probabilistic | No |
2222
| `ensemble` | An ensemble of the hub forecasts, created by taking an equally weighted sample of all forecasts that submit samples for a given week, using the function linear_pool from the hubEnsembles package. | https://github.com/hubverse-org/hubEnsembles/tree/main | Other model submission files | All | Point and probabilistic | No |
@@ -54,10 +54,19 @@ The following models began submissions following the initial assessment period (
5454

5555
## Additional results
5656

57-
![Fig. S12 Comparison of the observed clade proportions by sequence collection week across nowcast dates (columns) in the California specific data source for sequencing called California COVIDNet (top row) and the NCBI GenBank data provided by the Hub and used by most other models (bottom row). Colors indicate clade, dashed line indicates the nowcast date. In most weeks, the California specifc dataset, California COVIDNet, has observed clade proportions for a more recent week than the NCBI GenBank dataset does for California.](../output/figs/supp/CA_source_seq_props.png)
57+
![Fig. S12 Average of relative scaled skill scores on the Brier score in the US excluding California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_brier.png)
5858

59-
![Fig. S13 Ratio of the number of sequences available in the NCBI GenBank dataset for California compared to the California-specific dataset across collection dates for a subset of nowcast dates (colums). Patterns indicate that even further back in time, the GenBank dataset at most makes up between 50% and 90% of the California specific data volume, with a particular drop off closer to the nowcast date.](../output/figs/supp/CA_source_seq_level_comparison.png)
59+
![Fig. S13 Average of relative scaled skill scores on the energy score in the US excluding California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_energy.png)
6060

61-
![Fig. S14 Absolute Brier (top) and energy (bottom) in the U.S. excluding California (left) and California (right).](../output/figs/overall_scores/supp/absolute_scores_by_horizon.png)
61+
![Fig. S14 Average of relative scaled skill scores on the Brier score in California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_brier_ca.png)
6262

63-
![Fig. S15 Bias over time for three example states in the US during the 25A emergence.](../output/figs/zoom_25A/supp/bias_over_time_25A.png)
63+
![Fig. S15 Average of relative scaled skill scores on the energy score in California. In this figure, we compute the relative scaled skill for each horizon, location, and nowcast date and then average across the relative scaled skills scores, which has the effect of weighting each horizon, location, and nowcast date equally rather than weighting by the number of observed sequences](../output/figs/supp/avg_rel_skill_energy_ca.png)
64+
65+
66+
![Fig. S16 Comparison of the observed clade proportions by sequence collection week across nowcast dates (columns) in the California specific data source for sequencing called California COVIDNet (top row) and the NCBI GenBank data provided by the Hub and used by most other models (bottom row). Colors indicate clade, dashed line indicates the nowcast date. In most weeks, the California specifc dataset, California COVIDNet, has observed clade proportions for a more recent week than the NCBI GenBank dataset does for California.](../output/figs/supp/CA_source_seq_props.png)
67+
68+
![Fig. S17 Ratio of the number of sequences available in the NCBI GenBank dataset for California compared to the California-specific dataset across collection dates for a subset of nowcast dates (colums). Patterns indicate that even further back in time, the GenBank dataset at most makes up between 50% and 90% of the California specific data volume, with a particular drop off closer to the nowcast date.](../output/figs/supp/CA_source_seq_level_comparison.png)
69+
70+
![Fig. S18 Absolute Brier (top) and energy (bottom) in the U.S. excluding California (left) and California (right) by horizon.](../output/figs/overall_scores/supp/absolute_scores_by_horizon.png)
71+
72+
![Fig. S19 Bias over time for three example states in the US during the 25A emergence.](../output/figs/zoom_25A/supp/bias_over_time_25A.png)

man/get_plot_avg_rel_skill_by_loc.Rd

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_plot_avg_rel_skill_by_t.Rd

Lines changed: 7 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_plot_avg_rel_skill_overall.Rd

Lines changed: 7 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

targets/fig_overall_targets.R

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -371,44 +371,51 @@ fig_overall_targets <- list(
371371
command = get_plot_avg_rel_skill_overall(
372372
scores_obj = su_scores_ep,
373373
score_type = "brier_score",
374-
title = "US minus CA"
374+
title = "US excluding CA",
375+
plot_name = "avg_rel_skill_brier"
375376
)
376377
),
377378
tar_target(
378379
name = plot_avg_overall_rel_skill_energy,
379380
command = get_plot_avg_rel_skill_overall(
380381
scores_obj = su_scores_ep,
381-
score_type = "energy_score"
382+
score_type = "energy_score",
383+
title = "US excluding CA",
384+
plot_name = "avg_rel_skill_energy"
382385
)
383386
),
384387
tar_target(
385388
name = plot_avg_rel_skill_brier_by_loc,
386389
command = get_plot_avg_rel_skill_by_loc(
387390
scores = su_scores_ep,
388391
seq_counts_by_loc = seq_counts_by_loc,
389-
score_type = "brier_score"
392+
score_type = "brier_score",
393+
plot_name = "avg_rel_skill_brier_loc"
390394
)
391395
),
392396
tar_target(
393397
name = plot_avg_rel_skill_brier_by_t,
394398
command = get_plot_avg_rel_skill_by_t(
395399
scores = su_scores_ep,
396-
score_type = "brier_score"
400+
score_type = "brier_score",
401+
plot_name = "avg_rel_skill_brier_t"
397402
)
398403
),
399404
tar_target(
400405
name = plot_avg_rel_skill_energy_by_loc,
401406
command = get_plot_avg_rel_skill_by_loc(
402407
scores = su_scores_ep,
403408
seq_counts_by_loc = seq_counts_by_loc,
404-
score_type = "energy_score"
409+
score_type = "energy_score",
410+
plot_name = "avg_rel_skill_energy_loc"
405411
)
406412
),
407413
tar_target(
408414
name = plot_avg_rel_skill_energy_by_t,
409415
command = get_plot_avg_rel_skill_by_t(
410416
scores = su_scores_ep,
411-
score_type = "energy_score"
417+
score_type = "energy_score",
418+
plot_name = "avg_rel_skill_energy_t"
412419
)
413420
),
414421
## CA ---------------------------------------------------------
@@ -417,44 +424,51 @@ fig_overall_targets <- list(
417424
command = get_plot_avg_rel_skill_overall(
418425
scores_obj = su_scores_ca,
419426
score_type = "brier_score",
420-
title = "US minus CA"
427+
title = "CA",
428+
plot_name = "avg_rel_skill_brier_ca"
421429
)
422430
),
423431
tar_target(
424432
name = plot_avg_overall_rel_skill_energy_ca,
425433
command = get_plot_avg_rel_skill_overall(
426434
scores_obj = su_scores_ca,
427-
score_type = "energy_score"
435+
score_type = "energy_score",
436+
title = "CA",
437+
plot_name = "avg_rel_skill_energy_ca"
428438
)
429439
),
430440
tar_target(
431441
name = plot_avg_rel_skill_brier_by_loc_ca,
432442
command = get_plot_avg_rel_skill_by_loc(
433443
scores = su_scores_ca,
434444
seq_counts_by_loc = seq_counts_by_loc,
435-
score_type = "brier_score"
445+
score_type = "brier_score",
446+
plot_name = "avg_rel_skill_brier_loc_ca"
436447
)
437448
),
438449
tar_target(
439450
name = plot_avg_rel_skill_brier_by_t_ca,
440451
command = get_plot_avg_rel_skill_by_t(
441452
scores = su_scores_ca,
442-
score_type = "brier_score"
453+
score_type = "brier_score",
454+
plot_name = "avg_rel_skill_brier_t_ca"
443455
)
444456
),
445457
tar_target(
446458
name = plot_avg_rel_skill_energy_by_loc_ca,
447459
command = get_plot_avg_rel_skill_by_loc(
448460
scores = su_scores_ca,
449461
seq_counts_by_loc = seq_counts_by_loc,
450-
score_type = "energy_score"
462+
score_type = "energy_score",
463+
plot_name = "avg_rel_skill_energy_loc_ca"
451464
)
452465
),
453466
tar_target(
454467
name = plot_avg_rel_skill_energy_by_t_ca,
455468
command = get_plot_avg_rel_skill_by_t(
456469
scores = su_scores_ca,
457-
score_type = "energy_score"
470+
score_type = "energy_score",
471+
plot_name = "avg_rel_skill_energy_t_ca"
458472
)
459473
)
460474
)

0 commit comments

Comments
 (0)