33# ' most recent dates
44# '
55# ' @inheritParams get_hosp_for_eval
6+ # ' @inheritParams add_correct_lod
67# ' @param filepath_name Name of directory to save the raw input wastewater data.
78# ' @importFrom fs dir_create
89# ' @importFrom readr read_tsv read_csv write_csv
910# ' @autoglobal
1011get_ww_for_eval <- function (location_name ,
1112 location_abbr ,
1213 forecast_date ,
14+ path_to_lod_vals ,
1315 forecast_horizon = 28 ,
1416 filepath_name = file.path(" input" , " data" , " ww" )) {
1517 # For now, just pull the latest and filter to lag days before the forecast
@@ -28,7 +30,8 @@ get_ww_for_eval <- function(location_name,
2830 ww_clean <- reformat_ww_data(
2931 raw_ww = RKI_ww_sites ,
3032 location_name = location_name ,
31- location_abbr = location_abbr
33+ location_abbr = location_abbr ,
34+ path_to_lod_vals = path_to_lod_vals
3235 )
3336
3437 return (ww_clean )
@@ -42,6 +45,7 @@ get_ww_for_eval <- function(location_name,
4245# ' @inheritParams get_ww_for_eval
4346# ' @param forecast_date Character string or date indicating the date of
4447# ' forecast in YYYY-MM-DD
48+ # ' @inheritParams add_correct_lod
4549# ' @param calibration_period Integer indicating the number of days of
4650# ' wastewater calibration data to extract. Default is `100`.
4751# ' @param ww_data_url Character string of the url of the wastewater data (not
@@ -53,6 +57,7 @@ get_ww_for_eval <- function(location_name,
5357get_ww_as_of_forecast_date <- function (forecast_date ,
5458 location_name ,
5559 location_abbr ,
60+ path_to_lod_vals ,
5661 filepath_name = file.path(" input" , " data" , " ww" , " vintages" ), # nolint
5762 calibration_period = 100 ,
5863 ww_data_url = " https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/refs/heads/main/amelag_einzelstandorte.tsv" ) { # nolint
@@ -75,7 +80,8 @@ get_ww_as_of_forecast_date <- function(forecast_date,
7580 ww_for_fit <- reformat_ww_data(
7681 raw_ww = RKI_ww_sites ,
7782 location_name = location_name ,
78- location_abbr = location_abbr
83+ location_abbr = location_abbr ,
84+ path_to_lod_vals = path_to_lod_vals
7985 ) | >
8086 filter(
8187 date > = ymd(forecast_date ) - days(calibration_period )
@@ -168,6 +174,7 @@ get_vintage <- function(raw_url,
168174# '
169175# ' @param raw_ww Data.frame from the RKI GitHub
170176# ' @inheritParams get_hosp_for_eval
177+ # ' @inheritParams add_correct_lod
171178# ' @param log_lod_val Scalar indicating the value to reset the
172179# ' limit of detection (LOD) to, to be
173180# ' removed in future iterations and replace with an LOD value at each
@@ -179,6 +186,7 @@ get_vintage <- function(raw_url,
179186reformat_ww_data <- function (raw_ww ,
180187 location_abbr ,
181188 location_name ,
189+ path_to_lod_vals ,
182190 log_lod_val = 1 ) {
183191 if (" unter_bg" %in% names(raw_ww )) {
184192 raw_ww <- dplyr :: rename(raw_ww , below_LOD = unter_bg )
@@ -238,5 +246,65 @@ reformat_ww_data <- function(raw_ww,
238246 below_LOD , location_abbr , location_name
239247 ) | >
240248 filter(! is.na(log_genome_copies_per_ml ))
241- return (ww_clean )
249+
250+ ww_w_lod <- add_correct_lod(
251+ ww_clean ,
252+ path_to_lod_vals
253+ )
254+ return (ww_w_lod )
255+ }
256+
257+ # ' Add the correct LOD using external data on LOQ for each gene at each site
258+ # ' and time point
259+ # '
260+ # ' @param ww_data Data.frame of wastewater data from an individual location
261+ # ' @param path_to_lod_vals Character string indicating the file path to the LOQ
262+ # ' data
263+ # '
264+ # ' @returns Data.frame containing updated `ww_data` where the LOD column has
265+ # ' now been filled in with the geometric mean of the LOQ across all genes
266+ # ' measured on that date and in that site, if the data is flagged as being
267+ # ' below the LOD.
268+ # ' @autoglobal
269+ # ' @importFrom cli cli_warn
270+ # ' @importFrom dplyr group_by summarise case_when filter
271+ add_correct_lod <- function (ww_data ,
272+ path_to_lod_vals = NULL ) {
273+ if (! file.exists(path_to_lod_vals ) || ! file.exists(path_to_lod_vals )) {
274+ cli_warn(
275+ message = " LOD values are missing!"
276+ )
277+ return (ww_data )
278+ }
279+
280+ lod_vals <- read_csv(path_to_lod_vals )
281+ overall_mean_loq <- lod_vals | >
282+ group_by(Standort , Bundesland , date ) | >
283+ summarise(mean_loq = exp(mean(log(loq ), na.rm = TRUE ))) | >
284+ ungroup() | >
285+ summarise(overall_mean = mean(mean_loq , na.rm = TRUE )) | >
286+ pull(overall_mean )
287+ overall_mean_loq_value <- overall_mean_loq # nolint: object_usage_linter
288+ lod_vals_clean <- filter(lod_vals , Standort %in% c(unique(ww_data $ site )))
289+ mean_lod <- lod_vals_clean | >
290+ filter(! is.na(loq )) | >
291+ group_by(Standort , Bundesland , date ) | >
292+ summarise(mean_loq = exp(mean(log(loq ), na.rm = TRUE )))
293+
294+ ww_data_lod_joined <- ww_data | >
295+ left_join(mean_lod , by = c(
296+ # nolint start
297+ " site" = " Standort" ,
298+ " location_name" = " Bundesland" ,
299+ " date"
300+ # nolint end
301+ )) | >
302+ mutate(log_lod = case_when(
303+ ! is.na(mean_loq ) & below_LOD == " ja" ~ log(mean_loq ),
304+ is.na(mean_loq ) & below_LOD == " ja" ~ log(overall_mean_loq_value ),
305+ TRUE ~ log_lod
306+ )) | >
307+ select(- mean_loq )
308+
309+ return (ww_data_lod_joined )
242310}
0 commit comments