Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ is not stable over time. Moved other "id" columns to end of returned data frames
encouraged to migrate to the "read_waterdata_metadata" functions.
* Added no_paging argument. This will make the request more efficient, but is not
recommended because it will silently cut off data after 50,000 rows.
* Removed max_results argument. Was confusing and redundant with the combination
of no_paging and limit.

dataRetrieval 2.7.21
===================
Expand Down
52 changes: 25 additions & 27 deletions R/construct_api_requests.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,28 +136,9 @@ check_limits <- function(args){
current_api_limit <- 50000

if(is.na(args[["limit"]])){
if(!is.na(args[["max_results"]])){
# we can leave limit empty unless we're doing no paging and the max is > limit
if(args[["max_results"]] > current_api_limit){
args[["limit"]] <- current_api_limit
if(args[["no_paging"]]){
warning("no_paging option is capped at ", current_api_limit, " max_results")
args[["max_results"]] <- current_api_limit
}
} else {
args[["limit"]] <- args[["max_results"]]
}

} else {
args[["limit"]] <- current_api_limit
}
} else {
if(!is.na(args[["max_results"]])){
if(args[["limit"]] > args[["max_results"]]) stop("limit cannot be greater than max_result")
} else if (args[["limit"]] > current_api_limit){
args[["limit"]] <- current_api_limit
}
}
args[["limit"]] <- current_api_limit
}

return(args)
}

Expand Down Expand Up @@ -283,6 +264,11 @@ switch_arg_id <- function(ls, id_name, service){
#' start_end2 <- c("2021-01-01T12:15:00-0500", "")
#' dataRetrieval:::format_api_dates(start_end2)
#'
#' time = c("2014-05-01T00:00:00Z", "2014-05-01T12:00:00Z")
#' dataRetrieval:::format_api_dates(time)
#'
#' time = c("2014-05-01T00:00Z", "2014-05-01T12:00Z")
#' dataRetrieval:::format_api_dates(time)
format_api_dates <- function(datetime, date = FALSE){

if(is.character(datetime)){
Expand All @@ -296,19 +282,31 @@ format_api_dates <- function(datetime, date = FALSE){
grepl("/", datetime)){
return(datetime)
} else {
datetime1 <- tryCatch({
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are you catching here? The following works without warning for me:

lubridate::as_datetime("2010-01-01T00:00:15Z")

Are you catching something else?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is funky:

> lubridate::as_datetime("2010-01-01T00:00Z")
[1] "2020-10-01 01:00:00 UTC"
> lubridate::as_datetime("2014-05-01T00:00Z")
[1] NA
Warning message:
All formats failed to parse. No formats found. 

lubridate::as_datetime(datetime)
},
warning = function(w) {
strptime(datetime, format = "%Y-%m-%dT%H:%MZ", tz = "UTC")
})
if(date){
datetime <- format(lubridate::as_datetime(datetime), "%Y-%m-%d")
datetime <- format(datetime1, "%Y-%m-%d")
} else {
datetime <- lubridate::format_ISO8601(lubridate::as_datetime(datetime), usetz = "Z")
datetime <- lubridate::format_ISO8601(datetime1, usetz = "Z")
}
}
} else if (length(datetime) == 2) {

datetime1 <- tryCatch({
lubridate::as_datetime(datetime)
},
warning = function(w) {
strptime(datetime, format = "%Y-%m-%dT%H:%MZ", tz = "UTC")
})

if(date){
datetime <- paste0(format(lubridate::as_datetime(datetime), "%Y-%m-%d"), collapse = "/")
datetime <- paste0(format(datetime1, "%Y-%m-%d"), collapse = "/")
} else {
datetime <- paste0(lubridate::format_ISO8601(lubridate::as_datetime(datetime),
usetz = "Z"),
datetime <- paste0(lubridate::format_ISO8601(datetime1, usetz = "Z"),
collapse = "/")
}

Expand Down
40 changes: 21 additions & 19 deletions R/get_ogc_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
get_ogc_data <- function(args,
output_id,
service){

args[["service"]] <- service


args <- switch_arg_id(args,
id_name = output_id,
service = service)
Expand All @@ -23,9 +21,7 @@ get_ogc_data <- function(args,
id = output_id)
convertType <- args[["convertType"]]
args[["convertType"]] <- NULL

max_results <- args[["max_results"]]
args[["max_results"]] <- NULL
args[["service"]] <- service

req <- do.call(construct_api_requests, args)

Expand All @@ -37,9 +33,9 @@ get_ogc_data <- function(args,
}

if(no_paging){
return_list <- get_csv(req, max_results)
return_list <- get_csv(req, limit = args[["limit"]])
} else {
return_list <- walk_pages(req, max_results)
return_list <- walk_pages(req)
}

if(is.na(args[["skipGeometry"]])){
Expand All @@ -51,10 +47,23 @@ get_ogc_data <- function(args,
return_list <- deal_with_empty(return_list, properties, service,
skipGeometry, convertType, no_paging)

if(convertType) return_list <- cleanup_cols(return_list, service = service)

return_list <- rejigger_cols(return_list, properties, output_id)

if(convertType){
return_list <- cleanup_cols(return_list, service)
return_list <- order_results(return_list)

# Mostly drop the id column except ts-meta, monitoring location:
if(!service %in% c("monitoring-locations",
"time-series-metadata",
"parameter-codes")){
return_list <- return_list[, names(return_list)[names(return_list)!= output_id]]
}
# Move other id columns:
return_list <- move_id_col(return_list,
output_id)
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice that you moved this out of the individual functions, plus the column moving piece.


attr(return_list, "request") <- req
attr(return_list, "queryTime") <- Sys.time()
return_list
Expand All @@ -73,11 +82,7 @@ order_results <- function(df){
}

move_id_col <- function(df, output_id){
# attributes get dropped
req <- attr(df, "request")
queryTime <- attr(df, "queryTime")

df <- df[, names(df)[names(df)!= output_id]]

if("time_series_id" %in% names(df)){
df <- df[, c(names(df)[names(df)!= "time_series_id"],
"time_series_id")]
Expand All @@ -87,10 +92,7 @@ move_id_col <- function(df, output_id){
df <- df[, c(names(df)[names(df)!= "field_visit_id"],
"field_visit_id")]
}

attr(df, "request") <- req
attr(df, "queryTime") <- queryTime


return(df)
}

Expand Down
29 changes: 14 additions & 15 deletions R/read_waterdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ read_waterdata <- function(service,
match.arg(service, pkg.env$api_endpoints)

args <- list(...)
args[["service"]] <- service

output_id <- switch(service,
"daily" = "daily_id",
Expand All @@ -75,19 +74,20 @@ read_waterdata <- function(service,
args[["properties"]] <- NA_character_
}

if(!"limit" %in% names(args)){
args[["limit"]] <- NA_character_
}

args[["service"]] <- service
args <- check_limits(args)

data_req <- suppressWarnings(do.call(construct_api_requests, args))

data_req <- data_req |>
httr2::req_headers(`Content-Type` = "application/query-cql-json") |>
httr2::req_body_raw(CQL)

if("max_results" %in% names(args)){
max_results <- args[["max_results"]]
} else {
max_results <- NA
}

return_list <- walk_pages(data_req, max_results)
return_list <- walk_pages(data_req)

if(is.null(args[["skipGeometry"]])){
skipGeometry <- FALSE
Expand All @@ -101,16 +101,15 @@ read_waterdata <- function(service,
service,
skipGeometry,
convertType)

return_list <- rejigger_cols(return_list, args[["properties"]], output_id)

if(convertType) return_list <- cleanup_cols(return_list)

# Add other time series services when they come online
if(service %in% c("daily")){
return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ]
if(convertType){
return_list <- cleanup_cols(return_list, service)
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}

return_list <- rejigger_cols(return_list, args[["properties"]], output_id)

return(return_list)
}

Expand Down
10 changes: 1 addition & 9 deletions R/read_waterdata_continuous.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
#' limit is 50000. It may be beneficial to set this number lower if your internet
#' connection is spotty. The default (`NA`) will set the limit to the maximum
#' allowable limit for the service.
#' @param max_results The optional maximum number of rows to return. This value
#' must be less than the requested limit.
#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function
#' will convert the data to dates and qualifier to string vector, and sepcifically
#' order the returning data frame by time and monitoring_location_id.
Expand Down Expand Up @@ -80,7 +78,6 @@ read_waterdata_continuous <- function(monitoring_location_id = NA_character_,
last_modified = NA_character_,
time = NA_character_,
limit = NA,
max_results = NA,
convertType = TRUE,
no_paging = FALSE){

Expand All @@ -97,12 +94,7 @@ read_waterdata_continuous <- function(monitoring_location_id = NA_character_,
return_list <- get_ogc_data(args,
output_id,
service)

if(convertType){
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}


return(return_list)
}

Expand Down
9 changes: 0 additions & 9 deletions R/read_waterdata_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
#' limit is 50000. It may be beneficial to set this number lower if your internet
#' connection is spotty. The default (`NA`) will set the limit to the maximum
#' allowable limit for the service.
#' @param max_results The optional maximum number of rows to return. This value
#' must be less than the requested limit.
#' @param skipGeometry This option can be used to skip response geometries for
#' each feature. The returning object will be a data frame with no spatial
#' information.
Expand Down Expand Up @@ -72,7 +70,6 @@
#' multi_site <- read_waterdata_daily(monitoring_location_id = c("USGS-01491000",
#' "USGS-01645000"),
#' parameter_code = c("00060", "00010"),
#' limit = 500,
#' time = c("2023-01-01", "2024-01-01"))
#'
#' dv_data_quick <- read_waterdata_daily(monitoring_location_id = site,
Expand All @@ -94,7 +91,6 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_,
time = NA_character_,
bbox = NA,
limit = NA,
max_results = NA,
convertType = TRUE,
no_paging = FALSE){

Expand All @@ -106,11 +102,6 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_,
output_id,
service)

if(convertType){
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}

return(return_list)
}

Expand Down
8 changes: 0 additions & 8 deletions R/read_waterdata_field_measurements.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
#' limit is 50000. It may be beneficial to set this number lower if your internet
#' connection is spotty. The default (`NA`) will set the limit to the maximum
#' allowable limit for the service.
#' @param max_results The optional maximum number of rows to return. This value
#' must be less than the requested limit.
#' @param skipGeometry This option can be used to skip response geometries for
#' each feature. The returning object will be a data frame with no spatial
#' information.
Expand Down Expand Up @@ -97,7 +95,6 @@ read_waterdata_field_measurements <- function(monitoring_location_id = NA_charac
time = NA_character_,
bbox = NA,
limit = NA,
max_results = NA,
convertType = TRUE,
no_paging = FALSE){

Expand All @@ -109,11 +106,6 @@ read_waterdata_field_measurements <- function(monitoring_location_id = NA_charac
output_id,
service)

if(convertType){
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}

return(return_list)
}

Expand Down
8 changes: 0 additions & 8 deletions R/read_waterdata_latest_continuous.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
#' limit is 50000. It may be beneficial to set this number lower if your internet
#' connection is spotty. The default (`NA`) will set the limit to the maximum
#' allowable limit for the service.
#' @param max_results The optional maximum number of rows to return. This value
#' must be less than the requested limit.
#' @param skipGeometry This option can be used to skip response geometries for
#' each feature. The returning object will be a data frame with no spatial
#' information.
Expand Down Expand Up @@ -93,7 +91,6 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact
time = NA_character_,
bbox = NA,
limit = NA,
max_results = NA,
convertType = TRUE,
no_paging = FALSE){

Expand All @@ -104,11 +101,6 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact
return_list <- get_ogc_data(args,
output_id,
service)

if(convertType){
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}

return(return_list)
}
Expand Down
7 changes: 0 additions & 7 deletions R/read_waterdata_latest_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
#' limit is 50000. It may be beneficial to set this number lower if your internet
#' connection is spotty. The default (`NA`) will set the limit to the maximum
#' allowable limit for the service.
#' @param max_results The optional maximum number of rows to return. This value
#' must be less than the requested limit.
#' @param skipGeometry This option can be used to skip response geometries for
#' each feature. The returning object will be a data frame with no spatial
#' information.
Expand Down Expand Up @@ -84,7 +82,6 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_,
time = NA_character_,
bbox = NA,
limit = NA,
max_results = NA,
convertType = TRUE,
no_paging = FALSE){

Expand All @@ -96,10 +93,6 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_,
output_id,
service)

if(convertType){
return_list <- order_results(return_list)
return_list <- move_id_col(return_list, output_id)
}
return(return_list)
}

Expand Down
Loading
Loading