Skip to content

Commit 226175f

Browse files
authored
Merge pull request #86 from epiforecasts/80-add-external-loq
Issue 80: Get the LOQ from external data source
2 parents 1786db7 + 5bf02e4 commit 226175f

File tree

10 files changed

+141
-6
lines changed

10 files changed

+141
-6
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export(sample_metrics)
77
export(save_csv)
88
importFrom(cli,cli_warn)
99
importFrom(dplyr,arrange)
10+
importFrom(dplyr,case_when)
1011
importFrom(dplyr,desc)
1112
importFrom(dplyr,distinct)
1213
importFrom(dplyr,filter)
@@ -16,6 +17,7 @@ importFrom(dplyr,mutate)
1617
importFrom(dplyr,pull)
1718
importFrom(dplyr,rename)
1819
importFrom(dplyr,select)
20+
importFrom(dplyr,summarise)
1921
importFrom(dplyr,ungroup)
2022
importFrom(forecast,auto.arima)
2123
importFrom(forecast,forecast)

R/get_ww_data.R

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
#' most recent dates
44
#'
55
#' @inheritParams get_hosp_for_eval
6+
#' @inheritParams add_correct_lod
67
#' @param filepath_name Name of directory to save the raw input wastewater data.
78
#' @importFrom fs dir_create
89
#' @importFrom readr read_tsv read_csv write_csv
910
#' @autoglobal
1011
get_ww_for_eval <- function(location_name,
1112
location_abbr,
1213
forecast_date,
14+
path_to_lod_vals,
1315
forecast_horizon = 28,
1416
filepath_name = file.path("input", "data", "ww")) {
1517
# For now, just pull the latest and filter to lag days before the forecast
@@ -28,7 +30,8 @@ get_ww_for_eval <- function(location_name,
2830
ww_clean <- reformat_ww_data(
2931
raw_ww = RKI_ww_sites,
3032
location_name = location_name,
31-
location_abbr = location_abbr
33+
location_abbr = location_abbr,
34+
path_to_lod_vals = path_to_lod_vals
3235
)
3336

3437
return(ww_clean)
@@ -42,6 +45,7 @@ get_ww_for_eval <- function(location_name,
4245
#' @inheritParams get_ww_for_eval
4346
#' @param forecast_date Character string or date indicating the date of
4447
#' forecast in YYYY-MM-DD
48+
#' @inheritParams add_correct_lod
4549
#' @param calibration_period Integer indicating the number of days of
4650
#' wastewater calibration data to extract. Default is `100`.
4751
#' @param ww_data_url Character string of the url of the wastewater data (not
@@ -53,6 +57,7 @@ get_ww_for_eval <- function(location_name,
5357
get_ww_as_of_forecast_date <- function(forecast_date,
5458
location_name,
5559
location_abbr,
60+
path_to_lod_vals,
5661
filepath_name = file.path("input", "data", "ww", "vintages"), # nolint
5762
calibration_period = 100,
5863
ww_data_url = "https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/refs/heads/main/amelag_einzelstandorte.tsv") { # nolint
@@ -75,7 +80,8 @@ get_ww_as_of_forecast_date <- function(forecast_date,
7580
ww_for_fit <- reformat_ww_data(
7681
raw_ww = RKI_ww_sites,
7782
location_name = location_name,
78-
location_abbr = location_abbr
83+
location_abbr = location_abbr,
84+
path_to_lod_vals = path_to_lod_vals
7985
) |>
8086
filter(
8187
date >= ymd(forecast_date) - days(calibration_period)
@@ -168,6 +174,7 @@ get_vintage <- function(raw_url,
168174
#'
169175
#' @param raw_ww Data.frame from the RKI GitHub
170176
#' @inheritParams get_hosp_for_eval
177+
#' @inheritParams add_correct_lod
171178
#' @param log_lod_val Scalar indicating the value to reset the
172179
#' limit of detection (LOD) to, to be
173180
#' removed in future iterations and replace with an LOD value at each
@@ -179,6 +186,7 @@ get_vintage <- function(raw_url,
179186
reformat_ww_data <- function(raw_ww,
180187
location_abbr,
181188
location_name,
189+
path_to_lod_vals,
182190
log_lod_val = 1) {
183191
if ("unter_bg" %in% names(raw_ww)) {
184192
raw_ww <- dplyr::rename(raw_ww, below_LOD = unter_bg)
@@ -238,5 +246,65 @@ reformat_ww_data <- function(raw_ww,
238246
below_LOD, location_abbr, location_name
239247
) |>
240248
filter(!is.na(log_genome_copies_per_ml))
241-
return(ww_clean)
249+
250+
ww_w_lod <- add_correct_lod(
251+
ww_clean,
252+
path_to_lod_vals
253+
)
254+
return(ww_w_lod)
255+
}
256+
257+
#' Add the correct LOD using external data on LOQ for each gene at each site
258+
#' and time point
259+
#'
260+
#' @param ww_data Data.frame of wastewater data from an individual location
261+
#' @param path_to_lod_vals Character string indicating the file path to the LOQ
262+
#' data
263+
#'
264+
#' @returns Data.frame containing updated `ww_data` where the LOD column has
265+
#' now been filled in with the geometric mean of the LOQ across all genes
266+
#' measured on that date and in that site, if the data is flagged as being
267+
#' below the LOD.
268+
#' @autoglobal
269+
#' @importFrom cli cli_warn
270+
#' @importFrom dplyr group_by summarise case_when filter
271+
add_correct_lod <- function(ww_data,
272+
path_to_lod_vals = NULL) {
273+
if (!file.exists(path_to_lod_vals) || !file.exists(path_to_lod_vals)) {
274+
cli_warn(
275+
message = "LOD values are missing!"
276+
)
277+
return(ww_data)
278+
}
279+
280+
lod_vals <- read_csv(path_to_lod_vals)
281+
overall_mean_loq <- lod_vals |>
282+
group_by(Standort, Bundesland, date) |>
283+
summarise(mean_loq = exp(mean(log(loq), na.rm = TRUE))) |>
284+
ungroup() |>
285+
summarise(overall_mean = mean(mean_loq, na.rm = TRUE)) |>
286+
pull(overall_mean)
287+
overall_mean_loq_value <- overall_mean_loq # nolint: object_usage_linter
288+
lod_vals_clean <- filter(lod_vals, Standort %in% c(unique(ww_data$site)))
289+
mean_lod <- lod_vals_clean |>
290+
filter(!is.na(loq)) |>
291+
group_by(Standort, Bundesland, date) |>
292+
summarise(mean_loq = exp(mean(log(loq), na.rm = TRUE)))
293+
294+
ww_data_lod_joined <- ww_data |>
295+
left_join(mean_lod, by = c(
296+
# nolint start
297+
"site" = "Standort",
298+
"location_name" = "Bundesland",
299+
"date"
300+
# nolint end
301+
)) |>
302+
mutate(log_lod = case_when(
303+
!is.na(mean_loq) & below_LOD == "ja" ~ log(mean_loq),
304+
is.na(mean_loq) & below_LOD == "ja" ~ log(overall_mean_loq_value),
305+
TRUE ~ log_lod
306+
)) |>
307+
select(-mean_loq)
308+
309+
return(ww_data_lod_joined)
242310
}

R/globals.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ utils::globalVariables(c(
6262
"log_genome_copies_per_ml", # <reformat_ww_data>
6363
"log_lod", # <reformat_ww_data>
6464
"site_pop", # <reformat_ww_data>
65+
"Standort", # <add_correct_lod>
66+
"Bundesland", # <add_correct_lod>
67+
"loq", # <add_correct_lod>
68+
"mean_loq", # <add_correct_lod>
69+
"overall_mean", # <add_correct_lod>
6570
"state", # <format_baseline_forecasts>
6671
"quantile_level", # <format_baseline_forecasts>
6772
NULL

inst/WORDLIST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ COVID
22
DD
33
KITmetricslab
44
LOD
5+
LOQ
56
Nowcast
67
RKI
78
YYYY

man/add_correct_lod.Rd

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_ww_as_of_forecast_date.Rd

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_ww_for_eval.Rd

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/reformat_ww_data.Rd

Lines changed: 10 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

targets/create_permutations_targets.R

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,17 @@ create_permutations_targets <- list(
33
name = locations,
44
command = tibble(
55
location_name = c("Berlin", "Hamburg"),
6-
location_abbr = c("BE", "HH")
6+
# nolint start
7+
# location_name = c("Nordrhein-Westfalen","Baden-Württemberg","Bayern",
8+
# "Rheinland-Pfalz","Thüringen" ,"Sachsen", "Berlin",
9+
# "Sachsen-Anhalt", "Niedersachsen", "Brandenburg",
10+
# "Bremen", "Hessen",
11+
# "Schleswig-Holstein", "Mecklenburg-Vorpommern",
12+
# "Hamburg","Saarland"),
13+
location_abbr = c("BE", "HH"),
14+
# location_abbr = c("NW","BW","BY","RP","TH","SN", "BE", "ST","NI",
15+
# "BB","HB","HE","SH","MV","HH","SL")
16+
# nolint end
717
)
818
),
919
tar_file(
@@ -31,6 +41,11 @@ create_permutations_targets <- list(
3141
)
3242
),
3343
tar_target(
44+
name = path_to_lod_vals,
45+
command = file.path(
46+
"input", "data",
47+
"loq_data.csv"
48+
)
3449
name = quantiles_to_save,
3550
command = c(0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975)
3651
),

targets/load_data_targets.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ load_data_targets <- list(
2323
get_ww_for_eval(
2424
location_name = scenarios$location_name,
2525
location_abbr = scenarios$location_abbr,
26-
forecast_date = scenarios$forecast_date
26+
forecast_date = scenarios$forecast_date,
27+
path_to_lod_vals = path_to_lod_vals,
2728
),
2829
pattern = map(scenarios)
2930
),
@@ -33,6 +34,7 @@ load_data_targets <- list(
3334
forecast_date = scenarios$forecast_date,
3435
location_name = scenarios$location_name,
3536
location_abbr = scenarios$location_abbr,
37+
path_to_lod_vals = path_to_lod_vals,
3638
calibration_period = 100
3739
),
3840
pattern = map(ww_data_eval, scenarios)

0 commit comments

Comments
 (0)