diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 94a35736..290f2bf2 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -40,7 +40,7 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@96b1dc658a45175f93ed5f33fda2b2cebbb12ee8 + - uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: extra-packages: | any::rcmdcheck diff --git a/DESCRIPTION b/DESCRIPTION index 96d0452a..e61e5727 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.21.9000 +Version: 2.7.22 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", @@ -29,7 +29,7 @@ Authors@R: c( person("Joeseph", "Zemmels", role="ctb", email = "jzemmels@usgs.gov", comment=c(ORCID = "0009-0008-1463-6313")), - person("Elise", "Hinman", role="ctb", + person("Elise", "Hinman", role="aut", email = "ehinman@usgs.gov", comment=c(ORCID = "0000-0001-5396-1583")), person("Michael", "Mahoney", role="ctb", diff --git a/NEWS b/NEWS index dd228539..ddc2a4f3 100644 --- a/NEWS +++ b/NEWS @@ -3,8 +3,12 @@ dataRetrieval 2.7.22 * Added read_waterdata_latest_daily to access latest daily USGS water data. * Added read_waterdata_continuous to access continuous USGS water data. * Added state_name and hydrologic_unit_code to read_waterdata_ts_meta -* Removed daily_id from read_waterdata_daily output. Currently it -is not stable over time. +* Removed daily_id from read_waterdata_daily output since it +is not stable over time. Moved other "id" columns to end of returned data frames. +* Changed examples for stateCd, countyCd, and parameterCdFile. Users are +encouraged to migrate to the "read_waterdata_metadata" functions. +* Added no_paging argument. This will make the request more efficient, but is not +recommended because it will silently cut off data after 50,000 rows. dataRetrieval 2.7.21 =================== diff --git a/R/AAA.R b/R/AAA.R index 489a3601..aed73232 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -12,7 +12,8 @@ pkg.env <- new.env() "continuous") collections <- c("parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", "aquifer-types", "coordinate-accuracy-codes", "coordinate-datum-codes", - "coordinate-method-codes", "medium-codes", + "coordinate-method-codes", "medium-codes", "counties", + "hydrologic-unit-codes", "states", "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", "topographic-codes", "time-zone-codes") diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index cd678de5..c0b7b079 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -16,10 +16,6 @@ #' @param skipGeometry This option can be used to skip response geometries for #' each feature. The returning object will be a data frame with no spatial #' information. -#' @param limit The optional limit parameter limits the number of items that are -#' presented in the response document. Only items are counted that are on the -#' first level of the collection in the response document. Nested objects -#' contained within the explicitly requested items shall not be counted. #' @keywords internal #' @examples #' site <- "USGS-02238500" @@ -43,16 +39,13 @@ construct_api_requests <- function(service, properties = NA_character_, bbox = NA, - limit = NA, - max_results = NA, skipGeometry = FALSE, + no_paging = FALSE, ...){ - baseURL <- setup_api(service) - POST <- FALSE - single_params <- c("datetime", "last_modified", "begin", "end", "time") + single_params <- c("datetime", "last_modified", "begin", "end", "time", "limit") full_list <- list(...) @@ -60,23 +53,12 @@ construct_api_requests <- function(service, warning("No filtering arguments specified.") } + # GET list refers to arguments that will go in the URL no matter what (not POST) get_list <- full_list[names(full_list) %in% single_params] get_list[["skipGeometry"]] <- skipGeometry - if(is.na(limit)){ - if(!is.na(max_results)){ - get_list[["limit"]] <- max_results - } else { - get_list[["limit"]] <- 50000 - } - } else { - if(!is.na(max_results)){ - if(limit > max_results) stop("limit cannot be greater than max_result") - } - get_list[["limit"]] <- limit - } - + #POST list are the arguments that need to be in the POST body post_list <- full_list[!names(full_list) %in% single_params] post_params <- explode_post(post_list) @@ -100,6 +82,9 @@ construct_api_requests <- function(service, } } + format_type <- ifelse(isTRUE(no_paging), "csv", "json") + + baseURL <- setup_api(service, format = format_type) baseURL <- explode_query(baseURL, POST = FALSE, get_list) if(all(!is.na(bbox))){ @@ -109,6 +94,18 @@ construct_api_requests <- function(service, } if(!all(is.na(properties))){ + available_properties <- property_list[[service]] + + if(!all(properties %in% available_properties)){ + # Check again: + schema <- check_OGC_requests(endpoint = service, type = "schema") + properties_fresh <- names(schema$properties) + if(!all(properties %in% properties_fresh)){ + stop("Invalid properties: ", + paste0(properties[!properties %in% properties_fresh], collapse = ", ")) + } + } + baseURL <- httr2::req_url_query(baseURL, properties = properties, .multi = "comma") @@ -135,6 +132,35 @@ construct_api_requests <- function(service, return(baseURL) } +check_limits <- function(args){ + current_api_limit <- 50000 + + if(is.na(args[["limit"]])){ + if(!is.na(args[["max_results"]])){ + # we can leave limit empty unless we're doing no paging and the max is > limit + if(args[["max_results"]] > current_api_limit){ + args[["limit"]] <- current_api_limit + if(args[["no_paging"]]){ + warning("no_paging option is capped at ", current_api_limit, " max_results") + args[["max_results"]] <- current_api_limit + } + } else { + args[["limit"]] <- args[["max_results"]] + } + + } else { + args[["limit"]] <- current_api_limit + } + } else { + if(!is.na(args[["max_results"]])){ + if(args[["limit"]] > args[["max_results"]]) stop("limit cannot be greater than max_result") + } else if (args[["limit"]] > current_api_limit){ + args[["limit"]] <- current_api_limit + } + } + return(args) +} + #' Setup the request for the OGC API requests #' #' @noRd @@ -161,12 +187,12 @@ base_url <- function(){ #' request <- dataRetrieval:::setup_api("daily") #' request #' } -setup_api <- function(service){ +setup_api <- function(service, format = "json"){ baseURL <- base_url() |> httr2::req_url_path_append("collections") |> httr2::req_url_path_append(service, "items") |> - basic_request() + basic_request(format = format) } @@ -207,76 +233,6 @@ switch_arg_id <- function(ls, id_name, service){ return(ls) } -#' Switch properties id -#' -#' @noRd -#' @return list -#' @examples -#' -#' properties <- c("id", "state_name", "country_name") -#' dataRetrieval:::switch_properties_id(properties, -#' id_name = "monitoring_location_id", -#' service = "monitoring-locations") -#' -#' properties2 <- c("monitoring_location_id", "state_name", "country_name") -#' dataRetrieval:::switch_properties_id(properties2, -#' id_name = "monitoring_location_id", -#' service = "monitoring-locations") -#' -#' properties3 <- c("monitoring_locations_id", "state_name", "country_name") -#' dataRetrieval:::switch_properties_id(properties3, -#' id_name = "monitoring_location_id", -#' service = "monitoring-locations") -switch_properties_id <- function(properties, id_name, service){ - - service_id <- paste0(gsub("-", "_", service), "_id") - - last_letter <- substr(service, nchar(service), nchar(service)) - if(last_letter == "s"){ - service_singluar <- substr(service,1, nchar(service)-1) - service_id_singular <- paste0(gsub("-", "_", service_singluar), "_id") - } else { - service_id_singular <- "" - } - - if(!"id" %in% properties){ - if(service_id %in% properties){ - properties[properties == service_id] <- "id" - - } else if(service_id_singular %in% properties) { - properties[properties == service_id_singular] <- "id" - } else { - properties[properties == id_name] <- "id" - } - } - - if(!all(is.na(properties))){ - - schema <- check_OGC_requests(endpoint = service, - type = "schema") - all_properties <- names(schema$properties) - - if(all(all_properties[!all_properties %in% c("id", "geometry")] %in% properties)) { - # Cleans up URL if we're asking for everything - properties <- NA_character_ - } else { - properties <- gsub("-", "_", properties) - properties <- properties[!properties %in% c("id", - "geometry", - paste0(gsub("-", "_", service), "_id"))] - - } - - if(!all(is.na(properties))){ - match.arg(properties, choices = all_properties, - several.ok = TRUE) - } - } - - return(properties) -} - - #' Format the date request #' #' Users will want to give either start/end dates or @@ -425,47 +381,7 @@ cql2_param <- function(parameter){ return(whisker::whisker.render(template, parameter_list)) } -#' Check OGC requests -#' -#' @param endpoint Character, can be any existing collection -#' @param type Character, can be "queryables", "schema" -#' @export -#' @keywords internal -#' @return list -#' @examplesIf is_dataRetrieval_user() -#' -#' \donttest{ -#' -#' dv_queryables <- check_OGC_requests(endpoint = "daily", -#' type = "queryables") -#' dv_schema <- check_OGC_requests(endpoint = "daily", -#' type = "schema") -#' ts_meta_queryables <- check_OGC_requests(endpoint = "time-series-metadata", -#' type = "queryables") -#' ts_meta_schema <- check_OGC_requests(endpoint = "time-series-metadata", -#' type = "schema") -#' } -check_OGC_requests <- function(endpoint = "daily", - type = "queryables"){ - - match.arg(type, c("queryables", "schema")) - - match.arg(endpoint, c(pkg.env$api_endpoints, - pkg.env$metadata)) - - req <- base_url() |> - httr2::req_url_path_append("collections") |> - httr2::req_url_path_append(endpoint) |> - httr2::req_url_path_append(type) |> - basic_request() - - query_ret <- req |> - httr2::req_perform() |> - httr2::resp_body_json() - - return(query_ret) - -} + #' Custom Error Messages #' @@ -515,12 +431,12 @@ error_body <- function(resp) { #' collect_request #' } #' -basic_request <- function(url_base){ +basic_request <- function(url_base, format = "json"){ req <- url_base |> httr2::req_user_agent(default_ua()) |> httr2::req_headers(`Accept-Encoding` = c("compress", "gzip")) |> - httr2::req_url_query(f = "json", + httr2::req_url_query(f = format, lang = "en-US") |> httr2::req_error(body = error_body) @@ -535,85 +451,4 @@ basic_request <- function(url_base){ } -#' Create service descriptions dynamically -#' -#' This function populates the parameter descriptions. -#' -#' @param service Character, can be any of the endpoints -#' @return list -#' @noRd -#' @examplesIf is_dataRetrieval_user() -#' -#' \donttest{ -#' ml_desc <- dataRetrieval:::get_description("monitoring-locations") -#' ml_desc -#' } -#' -get_description <- function(service){ - query_ret <- get_collection() - - tags <- query_ret[["tags"]] - - service_index <- which(sapply(tags, function(x){ - x$name == service - })) - - tags[[service_index]][["description"]] - -} - -#' Get collection response -#' -#' -#' @return httr2 response -#' @noRd -#' @examplesIf is_dataRetrieval_user() -#' -#' \donttest{ -#' collection <- dataRetrieval:::get_collection() -#' collection -#' } -#' -get_collection <- function(){ - - check_collections <- base_url() |> - httr2::req_url_path_append("openapi") |> - httr2::req_url_query(f = "html#/server/getCollections") - - check_endpoints_req <- basic_request(check_collections) - - query_ret <- httr2::req_perform(check_endpoints_req) |> - httr2::resp_body_json() - - return(query_ret) -} - -#' Create parameter descriptions dynamically -#' -#' This function populates the parameter descriptions. -#' -#' @param service Character, can be any of the endpoints -#' @return list -#' @noRd -#' @examplesIf is_dataRetrieval_user() -#' -#' \donttest{ -#' ml <- dataRetrieval:::get_params("monitoring-locations") -#' ml$national_aquifer_code -#' } -#' -get_params <- function(service){ - - check_queryables_req <- base_url() |> - httr2::req_url_path_append("collections") |> - httr2::req_url_path_append(service) |> - httr2::req_url_path_append("schema") |> - basic_request() - - query_ret <- httr2::req_perform(check_queryables_req) |> - httr2::resp_body_json() - - params <- sapply(query_ret$properties, function(x) x[["description"]]) - -} diff --git a/R/dataRetrievals-package.R b/R/dataRetrievals-package.R index cdb8bdf1..c7f431df 100644 --- a/R/dataRetrievals-package.R +++ b/R/dataRetrievals-package.R @@ -58,8 +58,13 @@ token_message) #' #' @docType data #' @export parameterCdFile -#' @examples -#' head(parameterCdFile[, 1:2]) +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' # Please migrate to: +#' parameterCds <- read_waterdata_metadata("parameter-codes") +#' +#' } NULL @@ -113,8 +118,13 @@ NULL #' @docType data #' @export stateCd #' @keywords USGS stateCd -#' @examples -#' head(stateCd) +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' # Please migrate to: +#' stateCd <- read_waterdata_metadata("states") +#' +#' } NULL #' US County Code Lookup Table @@ -136,8 +146,13 @@ NULL #' @docType data #' @export countyCd #' @keywords USGS countyCd -#' @examples -#' head(countyCd) +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' # Please migrate to: +#' countyCd <- read_waterdata_metadata("counties") +#' +#' } NULL # nolint start: commented_code_linter @@ -190,4 +205,21 @@ NULL # # save(countyCd, stateCd, parameterCdFile, pCodeToName, # file = "R/sysdata.rda", compress = "xz") -# nolint end +# +# services <- c("daily", "time-series-metadata", +# "monitoring-locations", "latest-continuous", +# "field-measurements", "latest-daily", +# "continuous") +# +# property_list <- list() +# +# for(i in services){ +# schema <- check_OGC_requests(endpoint = i, type = "schema") +# properties <- names(schema$properties) +# property_list[[i]] <- properties +# } +# rm(schema, i, services, properties) +# save(countyCd, stateCd, parameterCdFile, pCodeToName, property_list, offsetLibrary, +# file = "R/sysdata.rda", compress = "xz") +# +# # nolint end \ No newline at end of file diff --git a/R/deal_with_empty.R b/R/deal_with_empty.R new file mode 100644 index 00000000..e18901b5 --- /dev/null +++ b/R/deal_with_empty.R @@ -0,0 +1,75 @@ +#' Return a data frame if there's an empty response +#' +#' @param return_list data frame returned from walk_pages +#' @param properties A vector of requested columns +#' @param service character, can be any existing collection such +#' as "daily", "monitoring-locations", "time-series-metadata" +#' @param skipGeometry A logical for whether to return geometry +#' @param convertType A logical for whether to convert value to numeric +#' +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("time", "value"), +#' service = "daily") +#' +#' df2 <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = NA, +#' service = "daily") +#' +deal_with_empty <- function(return_list, + properties, + service, + skipGeometry, + convertType, + no_paging = FALSE){ + + if(nrow(return_list) == 0){ + + if(all(is.na(properties))){ + schema <- check_OGC_requests(endpoint = service, type = "schema") + properties <- names(schema$properties) + } + return_list <- data.frame(matrix(nrow = 0, + ncol = length(properties))) + return_list <- lapply(return_list, as.character) + names(return_list) <- properties + + single_params <- c("datetime", "last_modified", "begin", "end", "time") + + for(i in single_params){ + if(i %in% names(return_list)){ + return_list[[i]] <- as.POSIXct(as.character(), origin = "1970-01-01") + } + } + + if(convertType && service == "daily"){ + return_list$time <- as.Date(as.character()) + } + + if(convertType && "value" %in% names(return_list)){ + return_list$value <- as.numeric() + } + + if(convertType && "contributing_drainage_area" %in% names(return_list)){ + return_list$contributing_drainage_area <- as.numeric() + } + + return_list <- data.frame(return_list) + return_list$geometry <- NULL + + if(!skipGeometry){ + if(!no_paging){ + return_list <- sf::st_as_sf(return_list, geometry = sf::st_sfc()) + } else { + return_list$x <- numeric() + return_list$y <- numeric() + } + } + + } + + return(return_list) +} \ No newline at end of file diff --git a/R/get_ogc_data.R b/R/get_ogc_data.R new file mode 100644 index 00000000..ba984a11 --- /dev/null +++ b/R/get_ogc_data.R @@ -0,0 +1,149 @@ +#' Coordinate the request and retrieval of OGC calls +#' +#' @param args arguments from individual functions +#' @param output_id Name of id column to return +#' @param service Endpoint name. +#' +#' @noRd +#' @return data.frame with attributes +get_ogc_data <- function(args, + output_id, + service){ + + args[["service"]] <- service + + args <- switch_arg_id(args, + id_name = output_id, + service = service) + + args <- check_limits(args) + + properties <- args[["properties"]] + args[["properties"]] <- switch_properties_id(properties, + id = output_id) + convertType <- args[["convertType"]] + args[["convertType"]] <- NULL + + max_results <- args[["max_results"]] + args[["max_results"]] <- NULL + + req <- do.call(construct_api_requests, args) + + if("no_paging" %in% names(args)){ + no_paging <- args[["no_paging"]] + args[["no_paging"]] <- NULL + } else { + no_paging <- FALSE + } + + if(no_paging){ + return_list <- get_csv(req, max_results) + } else { + return_list <- walk_pages(req, max_results) + } + + if(is.na(args[["skipGeometry"]])){ + skipGeometry <- FALSE + } else { + skipGeometry <- args[["skipGeometry"]] + } + + return_list <- deal_with_empty(return_list, properties, service, + skipGeometry, convertType, no_paging) + + if(convertType) return_list <- cleanup_cols(return_list, service = service) + + return_list <- rejigger_cols(return_list, properties, output_id) + + attr(return_list, "request") <- req + attr(return_list, "queryTime") <- Sys.time() + return_list +} + +order_results <- function(df){ + + if(all(c("time", "monitoring_location_id") %in% names(df))){ + df <- df[order(df$time, + df$monitoring_location_id), ] + } else if ("time" %in% names(df)) { + df <- df[order(df$time), ] + } + + return(df) +} + +move_id_col <- function(df, output_id){ + # attributes get dropped + req <- attr(df, "request") + queryTime <- attr(df, "queryTime") + + df <- df[, names(df)[names(df)!= output_id]] + if("time_series_id" %in% names(df)){ + df <- df[, c(names(df)[names(df)!= "time_series_id"], + "time_series_id")] + } + + if("field_visit_id" %in% names(df)){ + df <- df[, c(names(df)[names(df)!= "field_visit_id"], + "field_visit_id")] + } + + attr(df, "request") <- req + attr(df, "queryTime") <- queryTime + + return(df) +} + +#' Switch properties id +#' +#' If a user asks for either "id" or "output_id", it is only included in the +#' properties if that's the only column requested. "id" will always come back, +#' so it is not needed in the properties call. +#' +#' @noRd +#' @return list +#' @examples +#' +#' properties <- c("id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties, +#' id = "monitoring_location_id") +#' +#' properties2 <- c("monitoring_location_id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties2, +#' id = "monitoring_location_id") +#' +#' properties3 <- c("monitoring_locations_id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties3, +#' id = "monitoring_location_id") +#' +#' properties4 <- c("monitoring_location_id") +#' dataRetrieval:::switch_properties_id(properties4, +#' id = "monitoring_location_id") +#' +#' properties5 <- c("monitoring_location_id", "geometry") +#' dataRetrieval:::switch_properties_id(properties5, +#' id = "monitoring_location_id") +#' +switch_properties_id <- function(properties, id){ + + orig_properties <- properties + if(!all(is.na(properties))){ + if("id" %in% properties){ + properties <- properties[properties != "id"] + } else if (id %in% properties){ + properties <- properties[properties != id] + } + + if("geometry" %in% properties){ + properties <- properties[properties != "geometry"] + } + + if(length(properties) == 0){ + # If a user requested only id and/or geometry, properties would now be empty + # geometry is taken care of with skipGeometry + properties <- "id" + } + } + + return(properties) +} diff --git a/R/get_ogc_documentation.R b/R/get_ogc_documentation.R new file mode 100644 index 00000000..0be10f37 --- /dev/null +++ b/R/get_ogc_documentation.R @@ -0,0 +1,151 @@ +#' Create service descriptions dynamically +#' +#' This function populates the parameter descriptions. +#' +#' @param service Character, can be any of the endpoints +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' ml_desc <- dataRetrieval:::get_description("monitoring-locations") +#' ml_desc +#' } +#' +get_description <- function(service){ + + query_ret <- get_collection() + + tags <- query_ret[["tags"]] + + service_index <- which(sapply(tags, function(x){ + x$name == service + })) + + tags[[service_index]][["description"]] + +} + +#' Get collection response +#' +#' +#' @return httr2 response +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' collection <- dataRetrieval:::get_collection() +#' collection +#' } +#' +get_collection <- function(){ + + check_collections <- base_url() |> + httr2::req_url_path_append("openapi") |> + httr2::req_url_query(f = "html#/server/getCollections") + + check_endpoints_req <- basic_request(check_collections) + + query_ret <- httr2::req_perform(check_endpoints_req) |> + httr2::resp_body_json() + + return(query_ret) +} + +#' Create parameter descriptions dynamically +#' +#' This function populates the parameter descriptions. +#' +#' @param service Character, can be any of the endpoints +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' ml <- dataRetrieval:::get_params("monitoring-locations") +#' ml$national_aquifer_code +#' } +#' +get_params <- function(service){ + + check_queryables_req <- base_url() |> + httr2::req_url_path_append("collections") |> + httr2::req_url_path_append(service) |> + httr2::req_url_path_append("schema") |> + basic_request() + + query_ret <- httr2::req_perform(check_queryables_req) |> + httr2::resp_body_json() + + params <- sapply(query_ret$properties, function(x) x[["description"]]) + +} + + +#' Get property list +#' +#' This function gets a list of available properties, and +#' renames the id column to what is used in dataRetrieval. +#' +#' @param service Character, can be any of the endpoints +#' @param output_id Character, dataRetrieval output name +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' dataRetrieval:::get_properties_for_docs("monitoring-locations", +#' "monitoring_location_id") +#' +#' } +#' +get_properties_for_docs <- function(service, output_id){ + + schema <- check_OGC_requests(endpoint = service, type = "schema") + properties <- names(schema$properties) + properties[properties == "id"] <- output_id + return(paste(properties, collapse = ", ")) + +} + +#' Check OGC requests +#' +#' @param endpoint Character, can be any existing collection +#' @param type Character, can be "queryables", "schema" +#' @export +#' @keywords internal +#' @return list +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' +#' dv_queryables <- check_OGC_requests(endpoint = "daily", +#' type = "queryables") +#' dv_schema <- check_OGC_requests(endpoint = "daily", +#' type = "schema") +#' ts_meta_queryables <- check_OGC_requests(endpoint = "time-series-metadata", +#' type = "queryables") +#' ts_meta_schema <- check_OGC_requests(endpoint = "time-series-metadata", +#' type = "schema") +#' } +check_OGC_requests <- function(endpoint = "daily", + type = "queryables"){ + + match.arg(type, c("queryables", "schema")) + + match.arg(endpoint, c(pkg.env$api_endpoints, + pkg.env$metadata)) + + req <- base_url() |> + httr2::req_url_path_append("collections") |> + httr2::req_url_path_append(endpoint) |> + httr2::req_url_path_append(type) |> + basic_request() + + query_ret <- req |> + httr2::req_perform() |> + httr2::resp_body_json() + + return(query_ret) + +} diff --git a/R/readNWISunit.R b/R/readNWISunit.R index cd300d67..387f0898 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -91,9 +91,9 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t service <- "iv_recent" } - .Deprecated(new = "read_waterdata_continuous", - package = "dataRetrieval", - msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_continuous.") + # .Deprecated(new = "read_waterdata_continuous", + # package = "dataRetrieval", + # msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_continuous.") url <- constructNWISURL(siteNumbers, diff --git a/R/read_waterdata_continuous.R b/R/read_waterdata_continuous.R index 0859c3a1..be1fb2e7 100644 --- a/R/read_waterdata_continuous.R +++ b/R/read_waterdata_continuous.R @@ -8,10 +8,16 @@ #' for new direct download functions that are expected to be available sometime #' in 2026. #' +#' Geometry output is not supported in the continuous data API +#' #' @export #' @param monitoring_location_id `r get_params("continuous")$monitoring_location_id` #' @param parameter_code `r get_params("continuous")$parameter_code` -#' @param time `r get_params("continuous")$time` +#' @param time `r get_params("continuous")$time`. +#' You can also use a vector of length 2: the first value being the starting date, +#' the second value being the ending date. NA's within the vector indicate a +#' half-bound date. For example, c("2024-01-01", NA) will return all data starting +#' at 2024-01-01. #' @param value `r get_params("continuous")$value` #' @param unit_of_measure `r get_params("continuous")$unit_of_measure` #' @param approval_status `r get_params("continuous")$approval_status` @@ -23,7 +29,8 @@ #' Requesting anything else will most-likely cause a timeout. #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "continuous", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id", "internal_id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("continuous", "continuous_id")`. +#' The default (`NA`) will return all columns of the data. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet @@ -34,6 +41,10 @@ #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector, and sepcifically #' order the returning data frame by time and monitoring_location_id. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -70,7 +81,8 @@ read_waterdata_continuous <- function(monitoring_location_id = NA_character_, time = NA_character_, limit = NA, max_results = NA, - convertType = TRUE){ + convertType = TRUE, + no_paging = FALSE){ service <- "continuous" output_id <- "continuous_id" @@ -87,27 +99,12 @@ read_waterdata_continuous <- function(monitoring_location_id = NA_character_, service) if(convertType){ - return_list <- order_results(return_list, properties) - return_list <- return_list[, names(return_list)[names(return_list)!= output_id]] - if("time_series_id" %in% names(return_list)){ - return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], - "time_series_id")] - } + return_list <- order_results(return_list) + return_list <- move_id_col(return_list, output_id) } return(return_list) } -order_results <- function(return_list, properties){ - - if(all(is.na(properties)) | - all(c("time", "monitoring_location_id") %in% properties)){ - return_list <- return_list[order(return_list$time, - return_list$monitoring_location_id), ] - } else if ("time" %in% properties) { - return_list <- return_list[order(return_list$time), ] - } - - return(return_list) -} + diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R index 8359e7a9..6869dd59 100644 --- a/R/read_waterdata_daily.R +++ b/R/read_waterdata_daily.R @@ -7,6 +7,10 @@ #' @param parameter_code `r get_params("daily")$parameter_code` #' @param statistic_id `r get_params("daily")$statistic_id` #' @param time `r get_params("daily")$time` +#' You can also use a vector of length 2: the first value being the starting date, +#' the second value being the ending date. NA's within the vector indicate a +#' half-bound date. For example, c("2024-01-01", NA) will return all data starting +#' at 2024-01-01. #' @param value `r get_params("daily")$value` #' @param unit_of_measure `r get_params("daily")$unit_of_measure` #' @param approval_status `r get_params("daily")$approval_status` @@ -15,7 +19,8 @@ #' @param qualifier `r get_params("daily")$qualifier` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "daily", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("daily", "daily_id")`. +#' The default (`NA`) will return all columns of the data. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -34,6 +39,10 @@ #' information. #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -65,6 +74,10 @@ #' parameter_code = c("00060", "00010"), #' limit = 500, #' time = c("2023-01-01", "2024-01-01")) +#' +#' dv_data_quick <- read_waterdata_daily(monitoring_location_id = site, +#' parameter_code = "00060", +#' no_paging = TRUE) #' #' } read_waterdata_daily <- function(monitoring_location_id = NA_character_, @@ -82,7 +95,8 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_, bbox = NA, limit = NA, max_results = NA, - convertType = TRUE){ + convertType = TRUE, + no_paging = FALSE){ service <- "daily" output_id <- "daily_id" @@ -93,12 +107,8 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_, service) if(convertType){ - return_list <- order_results(return_list, properties) - return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] - if("time_series_id" %in% names(return_list)){ - return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], - "time_series_id")] - } + return_list <- order_results(return_list) + return_list <- move_id_col(return_list, output_id) } return(return_list) diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R index 2f56377b..4c560538 100644 --- a/R/read_waterdata_field_measurements.R +++ b/R/read_waterdata_field_measurements.R @@ -7,6 +7,10 @@ #' @param parameter_code `r get_params("field-measurements")$parameter_code` #' @param observing_procedure_code `r get_params("field-measurements")$observing_procedure_code` #' @param time `r get_params("field-measurements")$time` +#' You can also use a vector of length 2: the first value being the starting date, +#' the second value being the ending date. NA's within the vector indicate a +#' half-bound date. For example, c("2024-01-01", NA) will return all data starting +#' at 2024-01-01. #' @param value `r get_params("field-measurements")$value` #' @param unit_of_measure `r get_params("field-measurements")$unit_of_measure` #' @param approval_status `r get_params("field-measurements")$approval_status` @@ -18,7 +22,8 @@ #' @param measuring_agency `r get_params("field-measurements")$measuring_agency` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "field-measurements", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("field-measurements", "field_measurement_id")`. +#' The default (`NA`) will return all columns of the data. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -37,6 +42,10 @@ #' information. #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -89,7 +98,8 @@ read_waterdata_field_measurements <- function(monitoring_location_id = NA_charac bbox = NA, limit = NA, max_results = NA, - convertType = TRUE){ + convertType = TRUE, + no_paging = FALSE){ service <- "field-measurements" output_id <- "field_measurement_id" @@ -100,12 +110,8 @@ read_waterdata_field_measurements <- function(monitoring_location_id = NA_charac service) if(convertType){ - return_list <- order_results(return_list, properties) - return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] - if("field_visit_id" %in% names(return_list)){ - return_list <- return_list[, c( names(return_list)[names(return_list)!= "field_visit_id"], - "field_visit_id")] - } + return_list <- order_results(return_list) + return_list <- move_id_col(return_list, output_id) } return(return_list) diff --git a/R/read_waterdata_latest_continuous.R b/R/read_waterdata_latest_continuous.R index 8b95386d..6327e383 100644 --- a/R/read_waterdata_latest_continuous.R +++ b/R/read_waterdata_latest_continuous.R @@ -6,6 +6,10 @@ #' @param monitoring_location_id `r get_params("latest-continuous")$monitoring_location_id` #' @param parameter_code `r get_params("latest-continuous")$parameter_code` #' @param time `r get_params("latest-continuous")$time` +#' You can also use a vector of length 2: the first value being the starting date, +#' the second value being the ending date. NA's within the vector indicate a +#' half-bound date. For example, c("2024-01-01", NA) will return all data starting +#' at 2024-01-01. #' @param value `r get_params("latest-continuous")$value` #' @param unit_of_measure `r get_params("latest-continuous")$unit_of_measure` #' @param approval_status `r get_params("latest-continuous")$approval_status` @@ -17,7 +21,8 @@ #' Requesting anything else will most-likely cause a timeout. #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "latest-continuous", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("latest-continuous", "latest_continuous_id")`. +#' The default (`NA`) will return all columns of the data. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -36,6 +41,10 @@ #' information. #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -85,7 +94,8 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact bbox = NA, limit = NA, max_results = NA, - convertType = TRUE){ + convertType = TRUE, + no_paging = FALSE){ service <- "latest-continuous" output_id <- "latest_continuous_id" @@ -96,12 +106,8 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact service) if(convertType){ - return_list <- order_results(return_list, properties) - return_list <- return_list[, names(return_list)[names(return_list)!= output_id]] - if("time_series_id" %in% names(return_list)){ - return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], - "time_series_id")] - } + return_list <- order_results(return_list) + return_list <- move_id_col(return_list, output_id) } return(return_list) diff --git a/R/read_waterdata_latest_daily.R b/R/read_waterdata_latest_daily.R index 5e98cc78..2f72a3b6 100644 --- a/R/read_waterdata_latest_daily.R +++ b/R/read_waterdata_latest_daily.R @@ -7,6 +7,10 @@ #' @param parameter_code `r get_params("latest-daily")$parameter_code` #' @param statistic_id `r get_params("latest-daily")$statistic_id` #' @param time `r get_params("latest-daily")$time` +#' You can also use a vector of length 2: the first value being the starting date, +#' the second value being the ending date. NA's within the vector indicate a +#' half-bound date. For example, c("2024-01-01", NA) will return all data starting +#' at 2024-01-01. #' @param value `r get_params("latest-daily")$value` #' @param unit_of_measure `r get_params("latest-daily")$unit_of_measure` #' @param approval_status `r get_params("latest-daily")$approval_status` @@ -15,7 +19,8 @@ #' @param qualifier `r get_params("latest-daily")$qualifier` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "latest-daily", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("latest-daily", "latest_daily_id")`. +#' The default (`NA`) will return all columns of the data. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -34,6 +39,10 @@ #' information. #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -76,7 +85,8 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_, bbox = NA, limit = NA, max_results = NA, - convertType = TRUE){ + convertType = TRUE, + no_paging = FALSE){ service <- "latest-daily" output_id <- "latest_daily_id" @@ -87,12 +97,8 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_, service) if(convertType){ - return_list <- order_results(return_list, properties) - return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] - if("time_series_id" %in% names(return_list)){ - return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], - "time_series_id")] - } + return_list <- order_results(return_list) + return_list <- move_id_col(return_list, output_id) } return(return_list) } diff --git a/R/read_waterdata_metadata.R b/R/read_waterdata_metadata.R index 4b617043..8a25c2db 100644 --- a/R/read_waterdata_metadata.R +++ b/R/read_waterdata_metadata.R @@ -39,14 +39,8 @@ read_waterdata_metadata <- function(collection, max_results = NA, limit = NA){ - - available <- c("parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", - "aquifer-types", "coordinate-accuracy-codes", "coordinate-datum-codes", - "coordinate-method-codes", "hydrologic-unit-codes", "medium-codes", - "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", - "topographic-codes", "time-zone-codes") - match.arg(collection, available) + match.arg(collection, pkg.env$metadata) output_id <- gsub("-", "_", collection) last_letter <- substr(output_id, @@ -65,7 +59,7 @@ read_waterdata_metadata <- function(collection, max_results = max_results)) return_list <- walk_pages(data_req, max_results) - + return_list <- rejigger_cols(df = return_list, properties = NA, output_id = output_id) diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R index 77d9fb2c..50baa84f 100644 --- a/R/read_waterdata_monitoring_location.R +++ b/R/read_waterdata_monitoring_location.R @@ -45,7 +45,8 @@ #' @param depth_source_code `r get_params("monitoring-locations")$depth_source_code` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "monitoring-locations", type = "schema"); paste(names(schema$properties), collapse = ", ")`. +#' `r dataRetrieval:::get_properties_for_docs("monitoring-locations", "monitoring_location_id")`. +#' The default (`NA`) will return all columns of the data. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or diff --git a/R/read_waterdata_parameter_codes.R b/R/read_waterdata_parameter_codes.R index f9f59e18..d1b9cf98 100644 --- a/R/read_waterdata_parameter_codes.R +++ b/R/read_waterdata_parameter_codes.R @@ -16,7 +16,8 @@ #' @param epa_equivalence `r get_params("parameter-codes")$epa_equivalence` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "parameter-codes", type = "schema"); paste(names(schema$properties), collapse = ", ")`. +#' `r dataRetrieval:::get_properties_for_docs("parameter-codes", "parameter_code_id")`. +#' The default (`NA`) will return all columns of the data. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet @@ -66,6 +67,7 @@ read_waterdata_parameter_codes <- function(parameter_code = NA_character_, args[["convertType"]] <- FALSE args[["skipGeometry"]] <- TRUE args[["bbox"]] <- NA + args[["no_paging"]] <- FALSE # change if we're ever over 50,000 if(all(lengths(args) == 1)){ return_list <- suppressWarnings(get_ogc_data(args = args, @@ -74,15 +76,14 @@ read_waterdata_parameter_codes <- function(parameter_code = NA_character_, } else { message("Current API functionality requires pulling the full parameter-codes list. -It is expected that updates to the API will eliminate this need, but in the meantime -consider running read_waterdata_parameter_code() with no query parameters, and filtering -in a post-processing step.") +It is expected that updates to the API will eliminate this need.") return_list <- read_waterdata_metadata(collection = service, max_results = max_results, limit = limit) args[["convertType"]] <- NULL args[["skipGeometry"]] <- NULL + args[["no_paging"]] <- NULL args_to_filter <- args[!is.na(args)] for(param in names(args_to_filter)){ return_list <- return_list[return_list[[param]] %in% args_to_filter[[param]],] diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index 7f5be9c8..9126445c 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -22,7 +22,8 @@ #' @param web_description `r get_params("time-series-metadata")$web_description` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "time-series-metadata", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` +#' `r dataRetrieval:::get_properties_for_docs("time-series-metadata", "time_series_id")`. +#' The default (`NA`) will return all columns of the data. #' @param time_series_id `r get_params("time-series-metadata")$id` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending @@ -42,6 +43,10 @@ #' @param skipGeometry This option can be used to skip response geometries for #' each feature. The returning object will be a data frame with no spatial #' information. +#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -83,7 +88,8 @@ read_waterdata_ts_meta <- function(monitoring_location_id = NA_character_, limit = NA, max_results = NA, bbox = NA, - convertType = FALSE){ + convertType = FALSE, + no_paging = FALSE){ service = "time-series-metadata" output_id <- "time_series_id" diff --git a/R/rejigger_cols.R b/R/rejigger_cols.R new file mode 100644 index 00000000..cf28fc96 --- /dev/null +++ b/R/rejigger_cols.R @@ -0,0 +1,97 @@ +#' Rejigger and rename +#' +#' Reorder columns based on users property input. +#' Add "service" prefix to returned "id" column. +#' This allows better integration with other endpoints. +#' +#' @param df data frame returned from walk_pages +#' @param properties A vector of requested columns +#' @param service character, can be any existing collection such +#' as "daily", "monitoring-locations", "time-series-metadata" +#' +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("state_code", "county_code", "id"), +#' service = "monitoring-locations") +#' df2 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("state_code", "id", "county_code"), +#' output_id = "monitoring_location_id") +#' +#' df3 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("state_code", "monitoring_location_id", "county_code"), +#' output_id = "monitoring_location_id") +#' +rejigger_cols <- function(df, properties, output_id){ + + if(!all(is.na(properties))){ + if(!"id" %in% properties){ + if(output_id %in% properties){ + names(df)[(names(df) == "id")] <- output_id + } + } + df <- df[, properties] + } else { + names(df)[(names(df) == "id")] <- output_id + } + df +} + + +#' Convert columns if needed +#' +#' These are columns that have caused problems in testing. +#' Mostly if the columns are empty on 1 page, but not the next. +#' The qualifier column also comes back as a list column. This +#' is fine for many, but others prefer a character column. +#' +#' +#' @param df data frame returned from walk_pages +#' @param service character, can be any existing collection such +#' as "daily" +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("time", "value", "id", "qualifier"), +#' service = "daily") +#' df2 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("value", "id", "time", "qualifier"), +#' service = "daily") +#' df3 <- dataRetrieval:::cleanup_cols(df2) +#' +cleanup_cols <- function(df, service = "daily"){ + + if("qualifier" %in% names(df)){ + if(!all(is.na(df$qualifier))){ + df$qualifier <- vapply(X = df$qualifier, + FUN = function(x) paste(x, collapse = ", "), + FUN.VALUE = c(NA_character_)) + } + } + + if("time" %in% names(df)){ + if(service == "daily"){ + df$time <- as.Date(df$time) + } + # by default, the data is put in POSIXct and seems + # to be pretty smart about the offset/tzone + } + + if("value" %in% names(df)){ + df$value <- as.numeric(df$value) + } + + if("contributing_drainage_area" %in% names(df)){ + df$contributing_drainage_area <- as.numeric(df$contributing_drainage_area) + } + + if("drainage_area" %in% names(df)){ + df$drainage_area <- as.numeric(df$drainage_area) + } + + df +} diff --git a/R/sysdata.rda b/R/sysdata.rda index e9befca0..2a50d5aa 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/walk_pages.R b/R/walk_pages.R index f5375c41..b035b436 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -1,174 +1,75 @@ -#' Return a data frame if there's an empty response +#' Walk through the pages #' -#' @param return_list data frame returned from walk_pages -#' @param properties A vector of requested columns -#' @param service character, can be any existing collection such -#' as "daily", "monitoring-locations", "time-series-metadata" -#' @param skipGeometry A logical for whether to return geometry -#' @param convertType A logical for whether to convert value to numeric +#' @param req httr2 initial request #' -#' @return data.frame #' @noRd -#' @examples -#' -#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), -#' properties = c("time", "value"), -#' service = "daily") -#' -#' df2 <- dataRetrieval:::deal_with_empty(data.frame(NULL), -#' properties = NA, -#' service = "daily") -#' -deal_with_empty <- function(return_list, properties, service, - skipGeometry, - convertType){ +#' @return data.frame with attributes +walk_pages <- function(req, max_results){ - if(nrow(return_list) == 0){ - - if(all(is.na(properties))){ - schema <- check_OGC_requests(endpoint = service, type = "schema") - properties <- names(schema$properties) - } - return_list <- data.frame(matrix(nrow = 0, - ncol = length(properties))) - return_list <- lapply(return_list, as.character) - names(return_list) <- properties - - single_params <- c("datetime", "last_modified", "begin", "end", "time") - - for(i in single_params){ - if(i %in% names(return_list)){ - return_list[[i]] <- as.POSIXct(as.character(), origin = "1970-01-01") - } - } - - if(convertType && service == "daily"){ - return_list$time <- as.Date(as.character()) - } + message("Requesting:\n", req$url) + current_api_limit <- 50000 + + if(is.na(max_results) | max_results > current_api_limit){ + resps <- httr2::req_perform_iterative(req, + next_req = next_req_url, + max_reqs = Inf, on_error = "return") + failures <- resps |> + httr2::resps_failures() |> + httr2::resps_requests() - if(convertType && "value" %in% names(return_list)){ - return_list$value <- as.numeric() + if(length(failures) > 0){ + stop(resps[[1]][["message"]]) } - if(convertType && "contributing_drainage_area" %in% names(return_list)){ - return_list$contributing_drainage_area <- as.numeric() + return_list <- data.frame() + for(resp in resps){ + df1 <- get_resp_data(resp) + return_list <- rbind(return_list, df1) } - - return_list <- data.frame(return_list) - return_list$geometry <- NULL - if(!skipGeometry){ - return_list <- sf::st_as_sf(return_list, geometry = sf::st_sfc()) + if(!is.na(max_results) & max_results > current_api_limit){ + return_list <- return_list[1:max_results, ] } + ###################################### + } else { + resps <- httr2::req_perform(req) + return_list <- get_resp_data(resps) } - + return(return_list) } -#' Rejigger and rename -#' -#' Reorder columns based on users property input. -#' Add "service" prefix to returned "id" column. -#' This allows better integration with other endpoints. +#' Get single response data frame #' -#' @param df data frame returned from walk_pages -#' @param properties A vector of requested columns -#' @param service character, can be any existing collection such -#' as "daily", "monitoring-locations", "time-series-metadata" +#' Depending on skipGeometry to decide to use sf or not. #' -#' @return data.frame #' @noRd -#' @examples -#' -#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), -#' properties = c("state_code", "county_code", "id"), -#' service = "monitoring-locations") -#' df2 <- dataRetrieval:::rejigger_cols(df, -#' properties = c("state_code", "id", "county_code"), -#' output_id = "monitoring_location_id") -#' -#' df3 <- dataRetrieval:::rejigger_cols(df, -#' properties = c("state_code", "monitoring_location_id", "county_code"), -#' output_id = "monitoring_location_id") -#' -rejigger_cols <- function(df, properties, output_id){ - - if(!all(is.na(properties))){ - if(!"id" %in% properties){ - if(output_id %in% properties){ - names(df)[(names(df) == "id")] <- output_id - } else { - # just in case users become aware of the singular/plural issue - # where the endpoint name is plural, but input to other endpoints are singular - plural <- gsub("_id", "s_id", output_id) - if(plural %in% properties){ - names(df)[(names(df) == "id")] <- plural - } - } - } - df <- df[, properties] - } else { - names(df)[(names(df) == "id")] <- output_id - } - df -} - - -#' Convert columns if needed -#' -#' These are columns that have caused problems in testing. -#' Mostly if the columns are empty on 1 page, but not the next. -#' The qualifier column also comes back as a list column. This -#' is fine for many, but others prefer a character column. #' +#' @param resp httr2 response from last request #' -#' @param df data frame returned from walk_pages -#' @param service character, can be any existing collection such -#' as "daily" #' @return data.frame -#' @noRd -#' @examples #' -#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), -#' properties = c("time", "value", "id", "qualifier"), -#' service = "daily") -#' df2 <- dataRetrieval:::rejigger_cols(df, -#' properties = c("value", "id", "time", "qualifier"), -#' service = "daily") -#' df3 <- dataRetrieval:::cleanup_cols(df2) -#' -cleanup_cols <- function(df, service = "daily"){ - - if("qualifier" %in% names(df)){ - if(!all(is.na(df$qualifier))){ - df$qualifier <- vapply(X = df$qualifier, - FUN = function(x) paste(x, collapse = ", "), - FUN.VALUE = c(NA_character_)) - } - } +get_resp_data <- function(resp) { - if("time" %in% names(df)){ - if(service == "daily"){ - df$time <- as.Date(df$time) - } - # by default, the data is put in POSIXct and seems - # to be pretty smart about the offset/tzone - } + body <- httr2::resp_body_json(resp) - if("value" %in% names(df)){ - df$value <- as.numeric(df$value) + if(isTRUE(body[["numberReturned"]] == 0)){ + return(data.frame()) } - if("contributing_drainage_area" %in% names(df)){ - df$contributing_drainage_area <- as.numeric(df$contributing_drainage_area) - } + use_sf <- !grepl("skipGeometry=true", resp$url, ignore.case = TRUE) + return_df <- sf::read_sf(httr2::resp_body_string(resp)) - if("drainage_area" %in% names(df)){ - df$drainage_area <- as.numeric(df$drainage_area) - } + if(!use_sf){ + return_df <- sf::st_drop_geometry(return_df) + if("AsGeoJSON(geometry)" %in% names(return_df)){ + return_df <- return_df[, !names(return_df) %in% "AsGeoJSON(geometry)"] + } + } + + return(return_df) - df } #' Next request URL @@ -183,9 +84,14 @@ cleanup_cols <- function(df, service = "daily"){ #' @return the url for the next request #' next_req_url <- function(resp, req) { - + body <- httr2::resp_body_json(resp) + if(isTRUE(body[["code"]] == "InvalidQuery")){ + message(body[["description"]]) + return(NULL) + } + if(isTRUE(body[["numberReturned"]] == 0)){ return(NULL) } @@ -200,134 +106,42 @@ next_req_url <- function(resp, req) { next_index <- which(sapply(links, function(x) x$rel) == "next") next_url <- links[[next_index]][["href"]] - + return(httr2::req_url(req = req, url = next_url)) } else { return(NULL) } } -#' Get single response data frame -#' -#' Depending on skipGeometry to decide to use sf or not. -#' -#' @noRd -#' -#' @param resp httr2 response from last request -#' -#' @return data.frame -#' -get_resp_data <- function(resp) { - - body <- httr2::resp_body_json(resp) - - if(isTRUE(body[["numberReturned"]] == 0)){ - return(data.frame()) - } - - use_sf <- !grepl("skipGeometry=true", resp$url, ignore.case = TRUE) - return_df <- sf::read_sf(httr2::resp_body_string(resp)) - - if(!use_sf){ - return_df <- sf::st_drop_geometry(return_df) - if("AsGeoJSON(geometry)" %in% names(return_df)){ - return_df <- return_df[, !names(return_df) %in% "AsGeoJSON(geometry)"] - } - } - return(return_df) +get_csv <- function(req, max_results){ -} + if(is.na(max_results)){ + max_results <- 50000 + } -#' Walk through the pages -#' -#' @param req httr2 initial request -#' -#' @noRd -#' @return data.frame with attributes -walk_pages <- function(req, max_results){ - message("Requesting:\n", req$url) - - if(is.na(max_results)){ - resps <- httr2::req_perform_iterative(req, - next_req = next_req_url, - max_reqs = Inf) - ###################################### - # So far I haven't tested this because I haven't had - # individual failures - failures <- resps |> - httr2::resps_failures() |> - httr2::resps_requests() - - if(length(failures) > 0){ - message("There were", length(failures), "failed requests.") - } - - return_list <- data.frame() - for(resp in resps){ - df1 <- get_resp_data(resp) - return_list <- rbind(return_list, df1) + skip_geo <- grepl("skipGeometry=true", req$url, ignore.case = TRUE) + resp <- httr2::req_perform(req) + + if(httr2::resp_has_body(resp)){ + return_list <- httr2::resp_body_string(resp) + df <- suppressMessages(readr::read_csv(file = return_list)) + if(skip_geo){ + df <- df[, names(df)[!names(df) %in% c("x", "y")]] + } else { + df <- sf::st_as_sf(df, coords = c("x","y")) + sf::st_crs(df) <- 4269 } - ###################################### + if(nrow(df) == max_results){ + warning("Missing data is probable. Use no_paging = FALSE to +ensure all requested data is returned.") + } } else { - resps <- httr2::req_perform(req) - return_list <- get_resp_data(resps) + df <- data.frame() } - - return(return_list) -} - - -#' Coordinate the request and retrieval of OGC calls -#' -#' @param args arguments from individual functions -#' @param output_id Name of id column to return -#' @param service Endpoint name. -#' @param max_results -#' -#' @noRd -#' @return data.frame with attributes -get_ogc_data <- function(args, - output_id, - service){ - - args[["service"]] <- service - - args <- switch_arg_id(args, - id_name = output_id, - service = service) - - properties <- args[["properties"]] - args[["properties"]] <- switch_properties_id(properties, - id_name = output_id, - service = service) - convertType <- args[["convertType"]] - args[["convertType"]] <- NULL - - req <- do.call(construct_api_requests, args) - max_results <- args[["max_results"]] - args[["max_results"]] <- NULL - - return_list <- walk_pages(req, max_results) - - if(is.na(args[["skipGeometry"]])){ - skipGeometry <- FALSE - } else { - skipGeometry <- args[["skipGeometry"]] - } - - return_list <- deal_with_empty(return_list, properties, service, - skipGeometry, convertType) - - if(convertType) return_list <- cleanup_cols(return_list, service = service) - - return_list <- rejigger_cols(return_list, properties, output_id) - - attr(return_list, "request") <- req - attr(return_list, "queryTime") <- Sys.time() - return(return_list) + return(df) } diff --git a/inst/CITATION b/inst/CITATION index 026d9ef6..a6f2aa60 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -24,12 +24,11 @@ bibentry(bibtype = "Manual", email = "akrall@usgs.gov", comment=c(ORCID = "0000-0003-2521-5043")), person("Lee", "Stanish", role="ctb", - email = "lstanish@usgs.gov", comment=c(ORCID = "0000-0002-9775-6861")), person("Joeseph", "Zemmels", role="ctb", email = "jzemmels@usgs.gov", comment=c(ORCID = "0009-0008-1463-6313")), - person("Elise", "Hinman", role="ctb", + person("Elise", "Hinman", role="aut", email = "ehinman@usgs.gov", comment=c(ORCID = "0000-0001-5396-1583")), person("Michael", "Mahoney", role="ctb", @@ -39,7 +38,7 @@ bibentry(bibtype = "Manual", title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.21", + version = "2.7.22", institution = "U.S. Geological Survey", year = 2025, doi = "10.5066/P9X4L3GE", diff --git a/man/check_OGC_requests.Rd b/man/check_OGC_requests.Rd index c6dc1bac..1d729796 100644 --- a/man/check_OGC_requests.Rd +++ b/man/check_OGC_requests.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/construct_api_requests.R +% Please edit documentation in R/get_ogc_documentation.R \name{check_OGC_requests} \alias{check_OGC_requests} \title{Check OGC requests} diff --git a/man/construct_api_requests.Rd b/man/construct_api_requests.Rd index 0c56fc39..3e691d75 100644 --- a/man/construct_api_requests.Rd +++ b/man/construct_api_requests.Rd @@ -8,9 +8,8 @@ construct_api_requests( service, properties = NA_character_, bbox = NA, - limit = NA, - max_results = NA, skipGeometry = FALSE, + no_paging = FALSE, ... ) } @@ -26,11 +25,6 @@ box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth).} -\item{limit}{The optional limit parameter limits the number of items that are -presented in the response document. Only items are counted that are on the -first level of the collection in the response document. Nested objects -contained within the explicitly requested items shall not be counted.} - \item{skipGeometry}{This option can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial information.} diff --git a/man/countyCd.Rd b/man/countyCd.Rd index af1ed628..ac2f93ee 100644 --- a/man/countyCd.Rd +++ b/man/countyCd.Rd @@ -21,7 +21,14 @@ Classic lookup table for counties. Has been replaced in functions with \code{check_waterdata_sample_params("counties")}. } \examples{ -head(countyCd) +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +# Please migrate to: +countyCd <- read_waterdata_metadata("counties") + +} +\dontshow{\}) # examplesIf} } \keyword{USGS} \keyword{countyCd} diff --git a/man/parameterCdFile.Rd b/man/parameterCdFile.Rd index 99d82bcc..40c5bc04 100644 --- a/man/parameterCdFile.Rd +++ b/man/parameterCdFile.Rd @@ -21,5 +21,12 @@ parameter_units \tab character \tab Parameter units\cr Complete list of USGS parameter codes as of Oct. 24, 2024. } \examples{ -head(parameterCdFile[, 1:2]) +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +# Please migrate to: +parameterCds <- read_waterdata_metadata("parameter-codes") + +} +\dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_continuous.Rd b/man/read_waterdata_continuous.Rd index 489127b2..0f818683 100644 --- a/man/read_waterdata_continuous.Rd +++ b/man/read_waterdata_continuous.Rd @@ -18,7 +18,8 @@ read_waterdata_continuous( time = NA_character_, limit = NA, max_results = NA, - convertType = TRUE + convertType = TRUE, + no_paging = FALSE ) } \arguments{ @@ -28,7 +29,8 @@ read_waterdata_continuous( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, continuous_id, internal_id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified. +The default (\code{NA}) will return all columns of the data.} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} @@ -66,7 +68,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. +. +You can also use a vector of length 2: the first value being the starting date, +the second value being the ending date. NA's within the vector indicate a +half-bound date. For example, c("2024-01-01", NA) will return all data starting +at 2024-01-01.} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable @@ -80,6 +87,11 @@ must be less than the requested limit.} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector, and sepcifically order the returning data frame by time and monitoring_location_id.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ This is an early version of the continuous endpoint. It is feature-complete and provides access to the full continuous data record, and is being made available as we continue to work on performance improvements. @@ -90,6 +102,8 @@ a single request. If no "time" is specified, the service will return the last single year of data. If this is a bottleneck, please check back for new direct download functions that are expected to be available sometime in 2026. + +Geometry output is not supported in the continuous data API } \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} diff --git a/man/read_waterdata_daily.Rd b/man/read_waterdata_daily.Rd index a626f248..3c4195c9 100644 --- a/man/read_waterdata_daily.Rd +++ b/man/read_waterdata_daily.Rd @@ -20,7 +20,8 @@ read_waterdata_daily( bbox = NA, limit = NA, max_results = NA, - convertType = TRUE + convertType = TRUE, + no_paging = FALSE ) } \arguments{ @@ -32,7 +33,8 @@ read_waterdata_daily( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, daily_id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified. +The default (\code{NA}) will return all columns of the data.} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} @@ -69,7 +71,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +You can also use a vector of length 2: the first value being the starting date, +the second value being the ending date. NA's within the vector indicate a +half-bound date. For example, c("2024-01-01", NA) will return all data starting +at 2024-01-01.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending @@ -89,6 +96,11 @@ must be less than the requested limit.} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. @@ -125,6 +137,10 @@ multi_site <- read_waterdata_daily(monitoring_location_id = c("USGS-01491000", parameter_code = c("00060", "00010"), limit = 500, time = c("2023-01-01", "2024-01-01")) + +dv_data_quick <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = "00060", + no_paging = TRUE) } \dontshow{\}) # examplesIf} diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index c4eaf201..96341b9c 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -23,7 +23,8 @@ read_waterdata_field_measurements( bbox = NA, limit = NA, max_results = NA, - convertType = TRUE + convertType = TRUE, + no_paging = FALSE ) } \arguments{ @@ -35,7 +36,8 @@ read_waterdata_field_measurements( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified} +geometry, field_measurement_id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified. +The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} @@ -78,7 +80,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +You can also use a vector of length 2: the first value being the starting date, +the second value being the ending date. NA's within the vector indicate a +half-bound date. For example, c("2024-01-01", NA) will return all data starting +at 2024-01-01.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending @@ -98,6 +105,11 @@ must be less than the requested limit.} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ Field measurements are physically measured values collected during a visit to the monitoring location. Field measurements consist of measurements of gage height and discharge, and readings of groundwater levels, and are primarily used as calibration readings for the automated sensors collecting continuous data. They are collected at a low frequency, and delivery of the data in WDFN may be delayed due to data processing time. diff --git a/man/read_waterdata_latest_continuous.Rd b/man/read_waterdata_latest_continuous.Rd index 6317e6f0..eb2490e6 100644 --- a/man/read_waterdata_latest_continuous.Rd +++ b/man/read_waterdata_latest_continuous.Rd @@ -20,7 +20,8 @@ read_waterdata_latest_continuous( bbox = NA, limit = NA, max_results = NA, - convertType = TRUE + convertType = TRUE, + no_paging = FALSE ) } \arguments{ @@ -35,7 +36,8 @@ Requesting anything else will most-likely cause a timeout.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, latest_continuous_id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified. +The default (\code{NA}) will return all columns of the data.} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} @@ -72,7 +74,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +You can also use a vector of length 2: the first value being the starting date, +the second value being the ending date. NA's within the vector indicate a +half-bound date. For example, c("2024-01-01", NA) will return all data starting +at 2024-01-01.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending @@ -92,6 +99,11 @@ must be less than the requested limit.} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ This endpoint provides the most recent observation for each time series of continuous data. Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code. These data might also be referred to as "instantaneous values" or "IV" diff --git a/man/read_waterdata_latest_daily.Rd b/man/read_waterdata_latest_daily.Rd index e6f12602..63c35c49 100644 --- a/man/read_waterdata_latest_daily.Rd +++ b/man/read_waterdata_latest_daily.Rd @@ -20,7 +20,8 @@ read_waterdata_latest_daily( bbox = NA, limit = NA, max_results = NA, - convertType = TRUE + convertType = TRUE, + no_paging = FALSE ) } \arguments{ @@ -32,7 +33,8 @@ read_waterdata_latest_daily( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, latest_daily_id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified. +The default (\code{NA}) will return all columns of the data.} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} @@ -69,7 +71,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +You can also use a vector of length 2: the first value being the starting date, +the second value being the ending date. NA's within the vector indicate a +half-bound date. For example, c("2024-01-01", NA) will return all data starting +at 2024-01-01.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending @@ -89,6 +96,11 @@ must be less than the requested limit.} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index b9fba111..e45ad160 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -135,7 +135,8 @@ read_waterdata_monitoring_location( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code.} +geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code. +The default (\code{NA}) will return all columns of the data.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending diff --git a/man/read_waterdata_parameter_codes.Rd b/man/read_waterdata_parameter_codes.Rd index 33970972..7fb569c0 100644 --- a/man/read_waterdata_parameter_codes.Rd +++ b/man/read_waterdata_parameter_codes.Rd @@ -46,7 +46,8 @@ read_waterdata_parameter_codes( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, parameter_name, unit_of_measure, parameter_group_code, parameter_description, medium, statistical_basis, time_basis, weight_basis, particle_size_basis, sample_fraction, temperature_basis, epa_equivalence.} +geometry, parameter_code_id, parameter_name, unit_of_measure, parameter_group_code, parameter_description, medium, statistical_basis, time_basis, weight_basis, particle_size_basis, sample_fraction, temperature_basis, epa_equivalence. +The default (\code{NA}) will return all columns of the data.} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index eaad9fd4..875d1e3c 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -28,7 +28,8 @@ read_waterdata_ts_meta( limit = NA, max_results = NA, bbox = NA, - convertType = FALSE + convertType = FALSE, + no_paging = FALSE ) } \arguments{ @@ -40,7 +41,8 @@ read_waterdata_ts_meta( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, unit_of_measure, parameter_name, parameter_code, statistic_id, hydrologic_unit_code, state_name, last_modified, begin, end, begin_utc, end_utc, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description, parent_time_series_id} +geometry, time_series_id, unit_of_measure, parameter_name, parameter_code, statistic_id, hydrologic_unit_code, state_name, last_modified, begin, end, begin_utc, end_utc, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description, parent_time_series_id. +The default (\code{NA}) will return all columns of the data.} \item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} @@ -104,6 +106,11 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} } \description{ Daily data and continuous measurements are grouped into time series, which represent a collection of observations of a single parameter, potentially aggregated using a standard statistic, at a single monitoring location. This endpoint provides metadata about those time series, including their operational thresholds, units of measurement, and when the earliest and most recent observations in a time series occurred. diff --git a/man/stateCd.Rd b/man/stateCd.Rd index b8eaea3d..51f2e3d7 100644 --- a/man/stateCd.Rd +++ b/man/stateCd.Rd @@ -20,7 +20,14 @@ Classic lookup table for states. Has been replaced in functions with \code{check_waterdata_sample_params("states")}. } \examples{ -head(stateCd) +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +# Please migrate to: +stateCd <- read_waterdata_metadata("states") + +} +\dontshow{\}) # examplesIf} } \keyword{USGS} \keyword{stateCd} diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index e515e773..547411a6 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -37,7 +37,7 @@ test_that("Unit value data returns correct types", { # nolint start: line_length_linter expect_equal( attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000" ) # nolint end timeZoneChange <- read_waterdata_continuous(monitoring_location_id = c("04024430", "04024000"), @@ -50,7 +50,7 @@ test_that("Unit value data returns correct types", { expect_is(rawData$value, "numeric") # nolint start: line_length_linter expect_equal(attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z") + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000") # nolint end site <- "USGS-04087170" pCode <- "63680" @@ -390,7 +390,7 @@ test_that("Construct USGS urls", { # nolint start: line_length_linter expect_equal(url_daily$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&time=2024-01-01%2F..&skipGeometry=FALSE&limit=50000") + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&time=2024-01-01%2F..&skipGeometry=FALSE") url_works <- dataRetrieval:::walk_pages(url_daily, max_results = 1) expect_true(nrow(url_works) > 0) @@ -401,7 +401,7 @@ test_that("Construct USGS urls", { expect_equal( url_ts_meta$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&limit=50000" + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE" ) url_works_ts <- dataRetrieval:::walk_pages(url_ts_meta, max_results = 1) @@ -410,7 +410,7 @@ test_that("Construct USGS urls", { url_ml <- construct_api_requests(id = siteNumber, service = "monitoring-locations") - expect_equal(url_ml$url, "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&limit=50000&id=USGS-01594440") + expect_equal(url_ml$url, "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&id=USGS-01594440") url_works_ml <- dataRetrieval:::walk_pages(url_ml, max_results = 1) expect_true(nrow(url_works_ml) > 0) @@ -527,3 +527,32 @@ test_that("pCode Stuff", { ) }) +context("Smart errors, warnings") +test_that("bad_properties", { + testthat::skip_on_cran() + testthat::skip_on_ci() + + expect_error(read_waterdata_daily(monitoring_location_id = "USGS-02238500", + parameter_code = c("00010"), + time = c("2021-01-01", "2022-01-01"), + properties = c("value", "time", "blah"))) + # Empty result: + expect_message(read_waterdata_daily(monitoring_location_id = "USGS-02238500", + parameter_code = c("00010"), + time = c("2021-01-01", "2022-01-01"), + no_paging = TRUE)) + + empty_return <- read_waterdata_daily(monitoring_location_id = "USGS-02238500", + parameter_code = c("00010"), + time = c("2021-01-01", "2022-01-01")) + expect_true(nrow(empty_return) == 0) + + empty_return2 <- read_waterdata_daily(monitoring_location_id = "USGS-02238500", + parameter_code = c("00010"), + time = c("2021-01-01", "2022-01-01"), + no_paging = TRUE) + + expect_true(nrow(empty_return2) == 0) + +}) + diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd index f8feda77..27d7b13d 100644 --- a/vignettes/read_waterdata_functions.Rmd +++ b/vignettes/read_waterdata_functions.Rmd @@ -433,6 +433,15 @@ coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") ``` +### County Identifiers + +`r dataRetrieval:::get_description("counties")` + +```{r} +#| eval: false +counties <- read_waterdata_metadata("counties") +``` + ### Hydrologic Unit Codes `r dataRetrieval:::get_description("hydrologic-unit-codes")` @@ -488,6 +497,25 @@ reliability_codes <- read_waterdata_metadata("reliability-codes") site_types <- read_waterdata_metadata("site-types") ``` +### State Identifiers + +`r dataRetrieval:::get_description("states")` + +```{r} +#| eval: false +states <- read_waterdata_metadata("states") +``` + +### Statistic Codes + +`r dataRetrieval:::get_description("statistic-codes")` + +```{r} +#| eval: false +statistic_codes <- read_waterdata_metadata("statistic-codes") +``` + + ### Topographic Codes `r dataRetrieval:::get_description("topographic-codes")` @@ -497,7 +525,7 @@ site_types <- read_waterdata_metadata("site-types") topographic_codes <- read_waterdata_metadata("topographic-codes") ``` -### Topographic Codes +### Time Zone Codes `r dataRetrieval:::get_description("time-zone-codes")`