44# '
55# ' @inheritParams get_hosp_for_eval
66# ' @param filepath_name Name of directory to save the raw input wastewater data.
7- # ' @importFrom dplyr mutate filter select rename
87# ' @importFrom fs dir_create
98# ' @importFrom readr read_tsv read_csv write_csv
109# ' @autoglobal
@@ -25,15 +24,176 @@ get_ww_for_eval <- function(location_name,
2524 }
2625
2726
28- ww_clean <- RKI_ww_sites | >
27+ # Rename columns and format for wwinference
28+ ww_clean <- reformat_ww_data(
29+ raw_ww = RKI_ww_sites ,
30+ location_name = location_name ,
31+ location_abbr = location_abbr
32+ )
33+
34+ return (ww_clean )
35+ }
36+
37+ # ' Use the Git commit history to get the wastewater data available as of
38+ # ' the forecast date. Subsequent function will filter it to the location we
39+ # ' want.
40+ # '
41+ # ' @inheritParams get_hosp_for_eval
42+ # ' @inheritParams get_ww_for_eval
43+ # ' @param forecast_date Character string or date indicating the date of
44+ # ' forecast in YYYY-MM-DD
45+ # ' @param calibration_period Integer indicating the number of days of
46+ # ' wastewater calibration data to extract. Default is `100`.
47+ # ' @param ww_data_url Character string of the url of the wastewater data (not
48+ # ' the raw version).
49+
50+ # ' @autoglobal
51+ # ' @importFrom dplyr filter
52+ # ' @importFrom lubridate ymd days
53+ get_ww_as_of_forecast_date <- function (forecast_date ,
54+ location_name ,
55+ location_abbr ,
56+ filepath_name = file.path(" input" , " data" , " ww" , " vintages" ), # nolint
57+ calibration_period = 100 ,
58+ ww_data_url = " https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/refs/heads/main/amelag_einzelstandorte.tsv" ) { # nolint
59+ vintage_fp <- file.path(filepath_name , forecast_date , " ww_sites_as_of.csv" )
60+ if (file.exists(vintage_fp )) {
61+ RKI_ww_sites <- read_csv(vintage_fp )
62+ } else {
63+ # Need to fine the url associated with the commit history as of the
64+ # forecast date
65+ ww_vintage_data_url <- get_vintage(
66+ raw_url = ww_data_url ,
67+ target_date = forecast_date
68+ )
69+ RKI_ww_sites <- read_tsv(ww_vintage_data_url )
70+ dir_create(file.path(filepath_name , forecast_date ))
71+ write_csv(RKI_ww_sites , vintage_fp )
72+ }
73+
74+ # Clean the and filter to the calibration period, add the forecast date
75+ ww_for_fit <- reformat_ww_data(
76+ raw_ww = RKI_ww_sites ,
77+ location_name = location_name ,
78+ location_abbr = location_abbr
79+ ) | >
80+ filter(
81+ date > = ymd(forecast_date ) - days(calibration_period )
82+ ) | >
83+ mutate(forecast_date = forecast_date )
84+
85+ return (ww_for_fit )
86+ }
87+
88+
89+ # ' Use GitHub raw url and commit history to get the url as of a specific target
90+ # ' date.
91+ # '
92+ # ' @param raw_url Character string indicating url of latest data.
93+ # ' @param target_date As of date
94+ # ' @param github_token Optional Personal Access Token
95+ # '
96+ # ' @returns Character string of the url to use to get the data as of a certain
97+ # ' date.
98+ # ' @autoglobal
99+ # ' @importFrom httr parse_url GET stop_for_status
100+ # ' @importFrom jsonlite fromJSON
101+ get_vintage <- function (raw_url ,
102+ target_date ,
103+ github_token = NULL ) {
104+ parsed <- parse_url(raw_url )
105+ path_parts <- strsplit(gsub(" ^/|/$" , " " , parsed $ path ), " /" )[[1 ]] # nolint
106+
107+ owner <- path_parts [1 ]
108+ repo <- path_parts [2 ]
109+
110+ if (length(path_parts ) > 3 && path_parts [3 ] == " refs" && path_parts [4 ] == " heads" ) { # nolint
111+ branch <- path_parts [5 ]
112+ file_path <- paste(path_parts [6 : length(path_parts )], collapse = " /" )
113+ } else {
114+ branch <- path_parts [3 ]
115+ file_path <- paste(path_parts [4 : length(path_parts )], collapse = " /" )
116+ }
117+
118+ target_datetime <- as.POSIXct(paste0(target_date , " 23:59:59" ), tz = " UTC" )
119+ target_iso <- format(target_datetime , " %Y-%m-%dT%H:%M:%SZ" )
120+
121+ headers <- c(" Accept" = " application/vnd.github.v3+json" ) # nolint
122+ if (! is.null(github_token )) {
123+ headers <- c(headers , " Authorization" = paste(" token" , github_token )) # nolint
124+ }
125+
126+ api_url <- sprintf(" https://api.github.com/repos/%s/%s/commits" , owner , repo )
127+
128+ response <- GET(
129+ api_url ,
130+ httr :: add_headers(.headers = headers ),
131+ query = list (
132+ path = file_path ,
133+ sha = branch ,
134+ until = target_iso ,
135+ per_page = 1
136+ )
137+ )
138+
139+ stop_for_status(response )
140+
141+ commits <- fromJSON(httr :: content(response , " text" , encoding = " UTF-8" ))
142+
143+ if (length(commits ) == 0 || nrow(commits ) == 0 ) {
144+ stop(sprintf(" No commits found for %s before %s" , file_path , target_date ),
145+ call. = FALSE
146+ )
147+ }
148+ # Get the commit SHA
149+ commit_sha <- commits $ sha [1 ]
150+ commit_date <- commits $ commit $ committer $ date [1 ]
151+
152+ # Construct the historical raw URL
153+ historical_url <- sprintf(
154+ " https://raw.githubusercontent.com/%s/%s/%s/%s" ,
155+ owner , repo , commit_sha , file_path
156+ )
157+
158+ message(sprintf(
159+ " Found commit %s from %s" ,
160+ substr(commit_sha , 1 , 7 ), commit_date
161+ ))
162+
163+ return (historical_url )
164+ }
165+
166+ # ' Reformat the ww data, selecting only the location we need and the variables
167+ # ' we need.
168+ # '
169+ # ' @param raw_ww Data.frame from the RKI GitHub
170+ # ' @inheritParams get_hosp_for_eval
171+ # ' @param log_lod_val Scalar indicating the value to reset the
172+ # ' limit of detection (LOD) to, to be
173+ # ' removed in future iterations and replace with an LOD value at each
174+ # ' measurement or site.
175+ # '
176+ # ' @returns Data.frame of cleaned wastewater values from location_abbr.
177+ # ' @importFrom dplyr mutate filter select rename
178+ # ' @autoglobal
179+ reformat_ww_data <- function (raw_ww ,
180+ location_abbr ,
181+ location_name ,
182+ log_lod_val = 1 ) {
183+ if (" normalisierung" %in% names(raw_ww )) {
184+ raw_ww <- dplyr :: rename(raw_ww , normalized = normalisierung )
185+ } else if (" viruslast_normalisiert" %in% names(raw_ww )) {
186+ raw_ww <- dplyr :: rename(raw_ww , normalized = viruslast_normalisiert )
187+ }
188+
189+ ww_clean <- raw_ww | >
29190 rename(
30191 location = " standort" ,
31192 date = " datum" ,
32193 state = " bundesland" ,
33194 conc = " viruslast" ,
34195 pop_cov = " einwohner" ,
35196 change_in_lab_indicator = " laborwechsel" ,
36- normalized = " viruslast_normalisiert" ,
37197 pathogen = " typ" ,
38198 below_LOD = " unter_bg"
39199 ) | >
@@ -49,44 +209,18 @@ get_ww_for_eval <- function(location_name,
49209 mutate(
50210 lab = change_in_lab_indicator ,
51211 log_genome_copies_per_ml = log((conc / 1e3 ) + 1e-8 ),
52- log_lod = 3 # make this up for now (maybe )
212+ log_lod = log_lod_val , # make this up for now (maybe )
213+ location_name = location_name ,
214+ location_abbr = location_abbr
53215 ) | >
54216 rename(
55217 site = location ,
56218 site_pop = pop_cov
57219 ) | >
58220 select(
59221 date , site , lab , log_genome_copies_per_ml , log_lod , site_pop ,
60- below_LOD
222+ below_LOD , location_abbr , location_name
61223 ) | >
62224 filter(! is.na(log_genome_copies_per_ml ))
63-
64225 return (ww_clean )
65226}
66-
67- # ' Filter wastewater data for fitting
68- # '
69- # ' @param ww_data_eval wastewater data for evaluation step
70- # ' @param forecast_date Character string or date indicating the date of
71- # ' forecast in YYYY-MM-DD
72- # ' @param calibration_period Integer indicating the number of days of
73- # ' wastewater calibration data to extract. Default is `100`.
74- # ' @param lag Integer indicating the number of days from the forecast date of
75- # ' the latest wastewater data. Default is `3`
76- # ' @autoglobal
77- # ' @importFrom dplyr filter
78- # ' @importFrom lubridate ymd days
79- get_ww_for_fit <- function (ww_data_eval ,
80- forecast_date ,
81- calibration_period = 100 ,
82- lag = 3 ) {
83- ww_for_fit <- ww_data_eval | >
84- filter(
85- date > = ymd(forecast_date ) - days(calibration_period ),
86- date < = ymd(forecast_date ) - days(lag )
87- ) | >
88- mutate(
89- forecast_date = forecast_date
90- )
91- return (ww_for_fit )
92- }
0 commit comments