diff --git a/DESCRIPTION b/DESCRIPTION index fb699970..96d0452a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,6 @@ Authors@R: c( email = "akrall@usgs.gov", comment=c(ORCID = "0000-0003-2521-5043")), person("Lee", "Stanish", role="ctb", - email = "lstanish@usgs.gov", comment=c(ORCID = "0000-0002-9775-6861")), person("Joeseph", "Zemmels", role="ctb", email = "jzemmels@usgs.gov", diff --git a/NAMESPACE b/NAMESPACE index 4bdbee6b..05e5f221 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(readWQPqw) export(readWQPsummary) export(read_USGS_samples) export(read_waterdata) +export(read_waterdata_continuous) export(read_waterdata_daily) export(read_waterdata_field_measurements) export(read_waterdata_latest_continuous) diff --git a/NEWS b/NEWS index 60b70c76..dd228539 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,10 @@ dataRetrieval 2.7.22 =================== * Added read_waterdata_latest_daily to access latest daily USGS water data. +* Added read_waterdata_continuous to access continuous USGS water data. * Added state_name and hydrologic_unit_code to read_waterdata_ts_meta +* Removed daily_id from read_waterdata_daily output. Currently it +is not stable over time. dataRetrieval 2.7.21 =================== diff --git a/R/AAA.R b/R/AAA.R index 6b12397f..489a3601 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -8,7 +8,8 @@ pkg.env <- new.env() services <- c("server", "daily", "time-series-metadata", "monitoring-locations", "latest-continuous", - "field-measurements", "latest-daily") + "field-measurements", "latest-daily", + "continuous") collections <- c("parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", "aquifer-types", "coordinate-accuracy-codes", "coordinate-datum-codes", "coordinate-method-codes", "medium-codes", diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index 74fc05d6..cd678de5 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -68,7 +68,7 @@ construct_api_requests <- function(service, if(!is.na(max_results)){ get_list[["limit"]] <- max_results } else { - get_list[["limit"]] <- 10000 + get_list[["limit"]] <- 50000 } } else { if(!is.na(max_results)){ diff --git a/R/readNWISunit.R b/R/readNWISunit.R index 67be5309..cd300d67 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -91,6 +91,11 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t service <- "iv_recent" } + .Deprecated(new = "read_waterdata_continuous", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_continuous.") + + url <- constructNWISURL(siteNumbers, parameterCd, startDate, @@ -360,7 +365,10 @@ readNWISmeas <- function(siteNumbers, expanded = FALSE, convertType = TRUE) { - message(new_nwis_message()) + .Deprecated(new = "read_waterdata_field_measurements", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_field_measurements.") + # Doesn't seem to be a WaterML1 format option url <- constructNWISURL( siteNumbers = siteNumbers, diff --git a/R/read_waterdata_continuous.R b/R/read_waterdata_continuous.R new file mode 100644 index 00000000..0859c3a1 --- /dev/null +++ b/R/read_waterdata_continuous.R @@ -0,0 +1,113 @@ +#' Get Continuous USGS Water Data +#' +#' @description `r get_description("continuous")` +#' +#' Currently, the services only allow up to 3 years of data to be requested with +#' a single request. If no "time" is specified, the service will return the +#' last single year of data. If this is a bottleneck, please check back +#' for new direct download functions that are expected to be available sometime +#' in 2026. +#' +#' @export +#' @param monitoring_location_id `r get_params("continuous")$monitoring_location_id` +#' @param parameter_code `r get_params("continuous")$parameter_code` +#' @param time `r get_params("continuous")$time` +#' @param value `r get_params("continuous")$value` +#' @param unit_of_measure `r get_params("continuous")$unit_of_measure` +#' @param approval_status `r get_params("continuous")$approval_status` +#' @param last_modified `r get_params("continuous")$last_modified` +#' @param time_series_id `r get_params("continuous")$time_series_id` +#' @param qualifier `r get_params("continuous")$qualifier` +#' @param statistic_id `r get_params("continuous")$statistic_id`. Note that +#' for continuous data, the statistic_id is almost universally 00011. +#' Requesting anything else will most-likely cause a timeout. +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r schema <- check_OGC_requests(endpoint = "continuous", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id", "internal_id")], collapse = ", ")` +#' @param limit The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 50000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param max_results The optional maximum number of rows to return. This value +#' must be less than the requested limit. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function +#' will convert the data to dates and qualifier to string vector, and sepcifically +#' order the returning data frame by time and monitoring_location_id. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' site <- "USGS-451605097071701" +#' pcode <- "72019" +#' +#' uv_data_trim <- read_waterdata_continuous(monitoring_location_id = site, +#' parameter_code = pcode, +#' properties = c("value", +#' "time")) +#' +#' uv_data <- read_waterdata_continuous(monitoring_location_id = site, +#' parameter_code = pcode, +#' time = "P2D") +#' +#' +#' # Only return data that has been modified in last 7 days +#' multi_site2 <- read_waterdata_continuous(monitoring_location_id = c("USGS-451605097071701", +#' "USGS-14181500"), +#' parameter_code = c("00060", "72019"), +#' last_modified = "P7D") +#' +#' } +read_waterdata_continuous <- function(monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + statistic_id = NA_character_, + value = NA, + last_modified = NA_character_, + time = NA_character_, + limit = NA, + max_results = NA, + convertType = TRUE){ + + service <- "continuous" + output_id <- "continuous_id" + + args <- mget(names(formals())) + args[["skipGeometry"]] <- TRUE + + if(!is.na(statistic_id) & !all(statistic_id == "00011")){ + warning("With few if any exceptions, statistic_id is always 00011 for continuous data, and requesting other statistic ids will likely return no data.") + } + + return_list <- get_ogc_data(args, + output_id, + service) + + if(convertType){ + return_list <- order_results(return_list, properties) + return_list <- return_list[, names(return_list)[names(return_list)!= output_id]] + if("time_series_id" %in% names(return_list)){ + return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], + "time_series_id")] + } + } + + return(return_list) +} + +order_results <- function(return_list, properties){ + + if(all(is.na(properties)) | + all(c("time", "monitoring_location_id") %in% properties)){ + return_list <- return_list[order(return_list$time, + return_list$monitoring_location_id), ] + } else if ("time" %in% properties) { + return_list <- return_list[order(return_list$time), ] + } + + return(return_list) +} + diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R index 3a578090..8359e7a9 100644 --- a/R/read_waterdata_daily.R +++ b/R/read_waterdata_daily.R @@ -13,10 +13,9 @@ #' @param last_modified `r get_params("daily")$last_modified` #' @param time_series_id `r get_params("daily")$time_series_id` #' @param qualifier `r get_params("daily")$qualifier` -#' @param daily_id `r get_params("daily")$id` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "daily", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' `r schema <- check_OGC_requests(endpoint = "daily", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -25,7 +24,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value @@ -39,7 +38,6 @@ #' #' \donttest{ #' site <- "USGS-02238500" -#' pcode <- "00060" #' dv_data_sf <- read_waterdata_daily(monitoring_location_id = site, #' parameter_code = "00060", #' time = c("2021-01-01", "2022-01-01")) @@ -50,8 +48,7 @@ #' #' dv_data_trim <- read_waterdata_daily(monitoring_location_id = site, #' parameter_code = "00060", -#' properties = c("monitoring_location_id", -#' "value", +#' properties = c("value", #' "time"), #' time = c("2021-01-01", "2022-01-01")) #' @@ -71,22 +68,21 @@ #' #' } read_waterdata_daily <- function(monitoring_location_id = NA_character_, - parameter_code = NA_character_, - statistic_id = NA_character_, - properties = NA_character_, - time_series_id = NA_character_, - daily_id = NA_character_, - approval_status = NA_character_, - unit_of_measure = NA_character_, - qualifier = NA_character_, - value = NA, - last_modified = NA_character_, - skipGeometry = NA, - time = NA_character_, - bbox = NA, - limit = NA, - max_results = NA, - convertType = TRUE){ + parameter_code = NA_character_, + statistic_id = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + value = NA, + last_modified = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + convertType = TRUE){ service <- "daily" output_id <- "daily_id" @@ -96,7 +92,14 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_, output_id, service) - return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + if(convertType){ + return_list <- order_results(return_list, properties) + return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] + if("time_series_id" %in% names(return_list)){ + return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], + "time_series_id")] + } + } return(return_list) } diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R index 150fb3d3..2f56377b 100644 --- a/R/read_waterdata_field_measurements.R +++ b/R/read_waterdata_field_measurements.R @@ -18,7 +18,7 @@ #' @param measuring_agency `r get_params("field-measurements")$measuring_agency` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "field-measurements", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' `r schema <- check_OGC_requests(endpoint = "field-measurements", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -27,7 +27,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value @@ -99,7 +99,14 @@ read_waterdata_field_measurements <- function(monitoring_location_id = NA_charac output_id, service) - return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + if(convertType){ + return_list <- order_results(return_list, properties) + return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] + if("field_visit_id" %in% names(return_list)){ + return_list <- return_list[, c( names(return_list)[names(return_list)!= "field_visit_id"], + "field_visit_id")] + } + } return(return_list) } diff --git a/R/read_waterdata_latest_continuous.R b/R/read_waterdata_latest_continuous.R index e2af5a15..8b95386d 100644 --- a/R/read_waterdata_latest_continuous.R +++ b/R/read_waterdata_latest_continuous.R @@ -5,7 +5,6 @@ #' @export #' @param monitoring_location_id `r get_params("latest-continuous")$monitoring_location_id` #' @param parameter_code `r get_params("latest-continuous")$parameter_code` -#' @param statistic_id `r get_params("latest-continuous")$statistic_id` #' @param time `r get_params("latest-continuous")$time` #' @param value `r get_params("latest-continuous")$value` #' @param unit_of_measure `r get_params("latest-continuous")$unit_of_measure` @@ -13,10 +12,12 @@ #' @param last_modified `r get_params("latest-continuous")$last_modified` #' @param time_series_id `r get_params("latest-continuous")$time_series_id` #' @param qualifier `r get_params("latest-continuous")$qualifier` -#' @param latest_continuous_id `r get_params("latest-continuous")$id` +#' @param statistic_id `r get_params("latest-continuous")$statistic_id`. Note that +#' for continuous data, the statistic_id is almost universally 00011. +#' Requesting anything else will most-likely cause a timeout. #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "latest-continuous", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' `r schema <- check_OGC_requests(endpoint = "latest-continuous", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -25,7 +26,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value @@ -74,7 +75,6 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact statistic_id = NA_character_, properties = NA_character_, time_series_id = NA_character_, - latest_continuous_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -95,7 +95,14 @@ read_waterdata_latest_continuous <- function(monitoring_location_id = NA_charact output_id, service) - return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + if(convertType){ + return_list <- order_results(return_list, properties) + return_list <- return_list[, names(return_list)[names(return_list)!= output_id]] + if("time_series_id" %in% names(return_list)){ + return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], + "time_series_id")] + } + } return(return_list) } diff --git a/R/read_waterdata_latest_daily.R b/R/read_waterdata_latest_daily.R index 6b6ff52c..5e98cc78 100644 --- a/R/read_waterdata_latest_daily.R +++ b/R/read_waterdata_latest_daily.R @@ -13,10 +13,9 @@ #' @param last_modified `r get_params("latest-daily")$last_modified` #' @param time_series_id `r get_params("latest-daily")$time_series_id` #' @param qualifier `r get_params("latest-daily")$qualifier` -#' @param latest_daily_id `r get_params("latest-daily")$id` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "latest-daily", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' `r schema <- check_OGC_requests(endpoint = "latest-daily", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or @@ -25,7 +24,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value @@ -67,7 +66,6 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_, statistic_id = NA_character_, properties = NA_character_, time_series_id = NA_character_, - latest_daily_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -88,8 +86,14 @@ read_waterdata_latest_daily <- function(monitoring_location_id = NA_character_, output_id, service) - return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] - + if(convertType){ + return_list <- order_results(return_list, properties) + return_list <- return_list[,names(return_list)[names(return_list)!= output_id]] + if("time_series_id" %in% names(return_list)){ + return_list <- return_list[, c( names(return_list)[names(return_list)!= "time_series_id"], + "time_series_id")] + } + } return(return_list) } diff --git a/R/read_waterdata_metadata.R b/R/read_waterdata_metadata.R index 9acb5102..4b617043 100644 --- a/R/read_waterdata_metadata.R +++ b/R/read_waterdata_metadata.R @@ -12,7 +12,7 @@ #' "topographic-codes", "time-zone-codes". #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R index 8a762cb1..77d9fb2c 100644 --- a/R/read_waterdata_monitoring_location.R +++ b/R/read_waterdata_monitoring_location.R @@ -54,7 +54,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value diff --git a/R/read_waterdata_parameter_codes.R b/R/read_waterdata_parameter_codes.R index d1cd68cb..f9f59e18 100644 --- a/R/read_waterdata_parameter_codes.R +++ b/R/read_waterdata_parameter_codes.R @@ -19,7 +19,7 @@ #' `r schema <- check_OGC_requests(endpoint = "parameter-codes", type = "schema"); paste(names(schema$properties), collapse = ", ")`. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index 59a9aba4..7f5be9c8 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -22,7 +22,7 @@ #' @param web_description `r get_params("time-series-metadata")$web_description` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: -#' `r schema <- check_OGC_requests(endpoint = "time-series-metadata", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' `r schema <- check_OGC_requests(endpoint = "time-series-metadata", type = "schema"); paste(names(schema$properties)[!names(schema$properties) %in% c("id")], collapse = ", ")` #' @param time_series_id `r get_params("time-series-metadata")$id` #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending @@ -32,7 +32,7 @@ #' Southern-most latitude, Eastern-most longitude, Northern-most longitude). #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable -#' limit is 10000. It may be beneficial to set this number lower if your internet +#' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. #' @param max_results The optional maximum number of rows to return. This value diff --git a/R/walk_pages.R b/R/walk_pages.R index 7cdd2c18..f5375c41 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -151,8 +151,9 @@ cleanup_cols <- function(df, service = "daily"){ if("time" %in% names(df)){ if(service == "daily"){ df$time <- as.Date(df$time) - } - # leave some room here for POSIXct in the other services. + } + # by default, the data is put in POSIXct and seems + # to be pretty smart about the offset/tzone } if("value" %in% names(df)){ diff --git a/README.Rmd b/README.Rmd index 8e5b2736..b57d8814 100644 --- a/README.Rmd +++ b/README.Rmd @@ -40,7 +40,7 @@ If you have additional questions about these changes, email CompTools@usgs.gov. # What would you like to do? -1. Get instantaneous USGS data (for example, discharge sensor data). Start here: `?readNWISuv`. If you only need the latest value, you can use the function: `?read_waterdata_latest_continuous`. +1. Get instantaneous USGS data (for example, discharge sensor data). Start here: `?read_waterdata_continuous`. If you only need the latest value, you can use the function: `?read_waterdata_latest_continuous`. 2. Get daily USGS data (for example, mean daily discharge). Start here: `?read_waterdata_daily`. If you only need the latest value, you can use the function: `?read_waterdata_latest_daily`. @@ -56,6 +56,8 @@ If you have additional questions about these changes, email CompTools@usgs.gov. 8. Add a USGS WaterData API token to your R environment. See: +9. Get the latest status on NWIS and other data access updates. See: + For additional tutorials, see: [Basic Tutorial](https://doi-usgs.github.io/dataRetrieval/articles/tutorial.html) diff --git a/README.md b/README.md index c714935f..47b5fad4 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,9 @@ If you have additional questions about these changes, email # What would you like to do? 1. Get instantaneous USGS data (for example, discharge sensor data). - Start here: `?readNWISuv`. If you only need the latest value, you - can use the function: `?read_waterdata_latest_continuous`. + Start here: `?read_waterdata_continuous`. If you only need the + latest value, you can use the function: + `?read_waterdata_latest_continuous`. 2. Get daily USGS data (for example, mean daily discharge). Start here: `?read_waterdata_daily`. If you only need the latest value, you can @@ -58,6 +59,9 @@ If you have additional questions about these changes, email 8. Add a USGS WaterData API token to your R environment. See: +9. Get the latest status on NWIS and other data access updates. See: + + For additional tutorials, see: [Basic @@ -198,15 +202,15 @@ NWIScitation <- create_NWIS_bib(dv) NWIScitation #> U.S. Geological Survey (2025). _National Water Information System data #> available on the World Wide Web (USGS Water Data for the Nation)_. -#> doi:10.5066/F7P55KJN , Accessed Nov -#> 24, 2025, +#> doi:10.5066/F7P55KJN , Accessed Dec +#> 03, 2025, #> . print(NWIScitation, style = "Bibtex") #> @Manual{, #> title = {National Water Information System data available on the World Wide Web (USGS Water Data for the Nation)}, #> author = {{U.S. Geological Survey}}, #> doi = {10.5066/F7P55KJN}, -#> note = {Accessed Nov 24, 2025}, +#> note = {Accessed Dec 03, 2025}, #> year = {2025}, #> url = {https://waterservices.usgs.gov/nwis/dv/?site=09010500&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=1851-01-01}, #> } @@ -230,14 +234,14 @@ WQPcitation <- create_WQP_bib(SC) WQPcitation #> National Water Quality Monitoring Council (2025). _Water Quality #> Portal_. doi:10.5066/P9QRKUVJ , -#> Accessed Nov 24, 2025, +#> Accessed Dec 03, 2025, #> . print(WQPcitation, style = "Bibtex") #> @Manual{, #> title = {Water Quality Portal}, #> author = {{National Water Quality Monitoring Council}}, #> doi = {10.5066/P9QRKUVJ}, -#> note = {Accessed Nov 24, 2025}, +#> note = {Accessed Dec 03, 2025}, #> year = {2025}, #> url = {https://www.waterqualitydata.us/data/Result/search?siteid=USGS-05288705&count=no&pCode=00300&mimeType=csv}, #> } diff --git a/_pkgdown.yml b/_pkgdown.yml index 7f11db90..02a303c3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,9 +69,11 @@ reference: contents: - read_waterdata_samples - read_waterdata_daily + - read_waterdata_continuous - read_waterdata_ts_meta - read_waterdata_monitoring_location - read_waterdata_latest_continuous + - read_waterdata_latest_daily - read_waterdata_field_measurements - read_waterdata_parameter_codes - read_waterdata_metadata diff --git a/inst/extdata/temperatureAndFlow.RData b/inst/extdata/temperatureAndFlow.RData index 7e77e642..163c8e43 100644 Binary files a/inst/extdata/temperatureAndFlow.RData and b/inst/extdata/temperatureAndFlow.RData differ diff --git a/man/read_waterdata_continuous.Rd b/man/read_waterdata_continuous.Rd new file mode 100644 index 00000000..489127b2 --- /dev/null +++ b/man/read_waterdata_continuous.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_continuous.R +\name{read_waterdata_continuous} +\alias{read_waterdata_continuous} +\title{Get Continuous USGS Water Data} +\usage{ +read_waterdata_continuous( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + statistic_id = NA_character_, + value = NA, + last_modified = NA_character_, + time = NA_character_, + limit = NA, + max_results = NA, + convertType = TRUE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500).} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://help.waterdata.usgs.gov/codes-and-parameters/parameters}.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} + +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} + +\item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{qualifier}{This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated.} + +\item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}. +. Note that +for continuous data, the statistic_id is almost universally 00011. +Requesting anything else will most-likely cause a timeout.} + +\item{value}{The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected.} + +\item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} + +\item{limit}{The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{max_results}{The optional maximum number of rows to return. This value +must be less than the requested limit.} + +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function +will convert the data to dates and qualifier to string vector, and sepcifically +order the returning data frame by time and monitoring_location_id.} +} +\description{ +This is an early version of the continuous endpoint. It is feature-complete and provides access to the full continuous data record, and is being made available as we continue to work on performance improvements. +Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code (pcode). These data might also be referred to as "instantaneous values" or "IV". + +Currently, the services only allow up to 3 years of data to be requested with +a single request. If no "time" is specified, the service will return the +last single year of data. If this is a bottleneck, please check back +for new direct download functions that are expected to be available sometime +in 2026. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +site <- "USGS-451605097071701" +pcode <- "72019" + +uv_data_trim <- read_waterdata_continuous(monitoring_location_id = site, + parameter_code = pcode, + properties = c("value", + "time")) + +uv_data <- read_waterdata_continuous(monitoring_location_id = site, + parameter_code = pcode, + time = "P2D") + + +# Only return data that has been modified in last 7 days +multi_site2 <- read_waterdata_continuous(monitoring_location_id = c("USGS-451605097071701", + "USGS-14181500"), + parameter_code = c("00060", "72019"), + last_modified = "P7D") + +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_daily.Rd b/man/read_waterdata_daily.Rd index 7fea81a7..a626f248 100644 --- a/man/read_waterdata_daily.Rd +++ b/man/read_waterdata_daily.Rd @@ -10,7 +10,6 @@ read_waterdata_daily( statistic_id = NA_character_, properties = NA_character_, time_series_id = NA_character_, - daily_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -33,12 +32,10 @@ read_waterdata_daily( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} -\item{daily_id}{A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does not imply any change to the data itself) a new ID will be generated. To uniquely identify a single observation over time, compare the \code{time} and \code{time_series_id} fields; each time series will only have a single observation at a given \code{time}.} - \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} \item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} @@ -83,7 +80,7 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} @@ -101,7 +98,6 @@ Daily data provide one data value to represent water conditions for the day. Thr \donttest{ site <- "USGS-02238500" -pcode <- "00060" dv_data_sf <- read_waterdata_daily(monitoring_location_id = site, parameter_code = "00060", time = c("2021-01-01", "2022-01-01")) @@ -112,8 +108,7 @@ dv_data_last_modified <- read_waterdata_daily(monitoring_location_id = site, dv_data_trim <- read_waterdata_daily(monitoring_location_id = site, parameter_code = "00060", - properties = c("monitoring_location_id", - "value", + properties = c("value", "time"), time = c("2021-01-01", "2022-01-01")) diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index 7173177d..c4eaf201 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -35,7 +35,7 @@ read_waterdata_field_measurements( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified} +geometry, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} @@ -89,7 +89,7 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/man/read_waterdata_latest_continuous.Rd b/man/read_waterdata_latest_continuous.Rd index e18deaf1..6317e6f0 100644 --- a/man/read_waterdata_latest_continuous.Rd +++ b/man/read_waterdata_latest_continuous.Rd @@ -10,7 +10,6 @@ read_waterdata_latest_continuous( statistic_id = NA_character_, properties = NA_character_, time_series_id = NA_character_, - latest_continuous_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -29,16 +28,17 @@ read_waterdata_latest_continuous( \item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://help.waterdata.usgs.gov/codes-and-parameters/parameters}.} -\item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} +\item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}. +. Note that +for continuous data, the statistic_id is almost universally 00011. +Requesting anything else will most-likely cause a timeout.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} -\item{latest_continuous_id}{A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does not imply any change to the data itself) a new ID will be generated. To uniquely identify a single observation over time, compare the \code{time} and \code{time_series_id} fields; each time series will only have a single observation at a given \code{time}.} - \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} \item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} @@ -83,7 +83,7 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/man/read_waterdata_latest_daily.Rd b/man/read_waterdata_latest_daily.Rd index 4e81325c..e6f12602 100644 --- a/man/read_waterdata_latest_daily.Rd +++ b/man/read_waterdata_latest_daily.Rd @@ -10,7 +10,6 @@ read_waterdata_latest_daily( statistic_id = NA_character_, properties = NA_character_, time_series_id = NA_character_, - latest_daily_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -33,12 +32,10 @@ read_waterdata_latest_daily( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} +geometry, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} -\item{latest_daily_id}{A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does not imply any change to the data itself) a new ID will be generated. To uniquely identify a single observation over time, compare the \code{time} and \code{time_series_id} fields; each time series will only have a single observation at a given \code{time}.} - \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} \item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} @@ -83,7 +80,7 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/man/read_waterdata_metadata.Rd b/man/read_waterdata_metadata.Rd index efb28848..83071a7c 100644 --- a/man/read_waterdata_metadata.Rd +++ b/man/read_waterdata_metadata.Rd @@ -19,7 +19,7 @@ must be less than the requested limit.} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} } diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index db3ef00a..b9fba111 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -146,7 +146,7 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/man/read_waterdata_parameter_codes.Rd b/man/read_waterdata_parameter_codes.Rd index ea5c4e99..33970972 100644 --- a/man/read_waterdata_parameter_codes.Rd +++ b/man/read_waterdata_parameter_codes.Rd @@ -50,7 +50,7 @@ geometry, id, parameter_name, unit_of_measure, parameter_group_code, parameter_d \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index 32242acb..eaad9fd4 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -40,7 +40,7 @@ read_waterdata_ts_meta( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, unit_of_measure, parameter_name, parameter_code, statistic_id, hydrologic_unit_code, state_name, last_modified, begin, end, begin_utc, end_utc, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description, parent_time_series_id} +geometry, unit_of_measure, parameter_name, parameter_code, statistic_id, hydrologic_unit_code, state_name, last_modified, begin, end, begin_utc, end_utc, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description, parent_time_series_id} \item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} @@ -88,7 +88,7 @@ information.} \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 10000. It may be beneficial to set this number lower if your internet +limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 4a96fd0b..e515e773 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -3,66 +3,67 @@ context("Unit values") test_that("Unit value data returns correct types", { testthat::skip_on_cran() skip_on_ci() - siteNumber <- "05114000" + siteNumber <- "USGS-05114000" parameterCd <- "00060" startDate <- "2014-10-10" endDate <- "2014-10-10" - rawData <- readNWISuv(siteNumber, parameterCd, startDate, endDate) - rawData <- renameNWISColumns(rawData) + rawData <- read_waterdata_continuous(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c(startDate, endDate)) - spreadOver120 <- readNWISuv( - siteNumber, parameterCd, - as.Date(Sys.Date() - 200), - Sys.Date() - ) - expect_true(min(spreadOver120$dateTime) < as.POSIXct(Sys.Date(), tz = "UTC")) + spreadOver120 <- read_waterdata_continuous(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c(as.Date(Sys.Date() - 200), + Sys.Date())) - recent_uv <- readNWISuv( - siteNumber, parameterCd, - as.Date(Sys.Date() - 10), - Sys.Date() - ) + expect_true(min(spreadOver120$time) < as.POSIXct(Sys.Date(), tz = "UTC")) + + recent_uv <- read_waterdata_continuous(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c(as.Date(Sys.Date() - 10), + Sys.Date())) expect_equal(grep( - x = attr(recent_uv, "url"), - pattern = "https://waterservices.usgs.gov/nwis/iv/" + x = attr(recent_uv, "request")[["url"]], + pattern = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous" ), 1) + expect_equal(grep( - x = attr(spreadOver120, "url"), - pattern = "https://nwis.waterservices.usgs.gov/nwis/iv/" + x = attr(spreadOver120, "request")[["url"]], + pattern = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous" ), 1) + # nolint start: line_length_linter expect_equal( - attr(rawData, "url"), - "https://nwis.waterservices.usgs.gov/nwis/iv/?site=05114000&format=waterml%2C1.1&ParameterCd=00060&startDT=2014-10-10&endDT=2014-10-10" + attr(rawData, "request")[["url"]], + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" ) # nolint end - timeZoneChange <- readNWISuv(c("04024430", "04024000"), parameterCd, - "2013-11-03", "2013-11-03", - tz = "America/Chicago" + timeZoneChange <- read_waterdata_continuous(monitoring_location_id = c("04024430", "04024000"), + parameter_code = parameterCd, + time = c("2013-11-03", "2013-11-03") ) - timeZoneChange <- renameNWISColumns(timeZoneChange) - expect_is(rawData$dateTime, "POSIXct") - expect_is(rawData$Flow_Inst, "numeric") + expect_is(rawData$time, "POSIXct") + expect_is(rawData$value, "numeric") # nolint start: line_length_linter - expect_equal(attr(rawData, "url"), "https://nwis.waterservices.usgs.gov/nwis/iv/?site=05114000&format=waterml%2C1.1&ParameterCd=00060&startDT=2014-10-10&endDT=2014-10-10") + expect_equal(attr(rawData, "request")[["url"]], + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z") # nolint end - site <- "04087170" + site <- "USGS-04087170" pCode <- "63680" startDate <- "2012-07-10" endDate <- "2012-07-17" - dd_2 <- readNWISuv(site, pCode, startDate, endDate) - expect_true(any(names(dd_2) %in% c( - "agency_cd", "site_no", - "dateTime", - "X_63680_00000", "X_63680_00000_cd", - "X_.YSI.6136.UP._63680_00000", - "X_.YSI.6136.UP._63680_00000_cd", - "tz_cd" - ))) + + # Example that use to have YSI.6136.UP and regular + # so now has 2 timeseries ids + dd_2 <- read_waterdata_continuous(monitoring_location_id = site, + parameter_code = pCode, + time = c(startDate, endDate)) + + expect_true(length(unique(dd_2$time_series_id)) == 2 ) }) context("Peak, rating, meas, site") @@ -79,9 +80,9 @@ test_that("peak, rating curves, surface-water measurements", { expect_that(length(attr(data, "RATING")), equals(7)) # Surface meas: - siteNumbers <- c("01594440", "040851325") - data <- readNWISmeas(siteNumbers) - expect_is(data$agency_cd, "character") + siteNumbers <- c("USGS-01594440", "USGS-040851325") + data <- read_waterdata_field_measurements(siteNumbers) + expect_is(data$monitoring_location_id, "character") siteINFO_USGS <- read_waterdata_monitoring_location(monitoring_location_id = "USGS-05114000") expect_is(siteINFO_USGS$agency_code, "character") @@ -91,9 +92,8 @@ test_that("peak, rating curves, surface-water measurements", { "USGS-09423350")) expect_true(nrow(siteINFOMulti_USGS) == 2) - Meas07227500.ex <- readNWISmeas("07227500", expanded = TRUE) - expect_is(Meas07227500.ex$measurement_dt, "Date") - expect_is(Meas07227500.ex$measurement_dateTime, "POSIXct") + Meas07227500.ex <- read_waterdata_field_measurements(monitoring_location_id = "USGS-07227500") + expect_is(Meas07227500.ex$time, "POSIXct") expect_equal(nrow(read_waterdata_ts_meta(monitoring_location_id = "USGS-10312000", parameter_code = "50286")), 0) @@ -390,7 +390,7 @@ test_that("Construct USGS urls", { # nolint start: line_length_linter expect_equal(url_daily$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&time=2024-01-01%2F..&skipGeometry=FALSE&limit=10000") + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&time=2024-01-01%2F..&skipGeometry=FALSE&limit=50000") url_works <- dataRetrieval:::walk_pages(url_daily, max_results = 1) expect_true(nrow(url_works) > 0) @@ -401,7 +401,7 @@ test_that("Construct USGS urls", { expect_equal( url_ts_meta$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&limit=10000" + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&limit=50000" ) url_works_ts <- dataRetrieval:::walk_pages(url_ts_meta, max_results = 1) @@ -410,7 +410,7 @@ test_that("Construct USGS urls", { url_ml <- construct_api_requests(id = siteNumber, service = "monitoring-locations") - expect_equal(url_ml$url, "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&limit=10000&id=USGS-01594440") + expect_equal(url_ml$url, "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&limit=50000&id=USGS-01594440") url_works_ml <- dataRetrieval:::walk_pages(url_ml, max_results = 1) expect_true(nrow(url_works_ml) > 0) diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 5536afcb..5d88407e 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -437,9 +437,9 @@ We're going walk through 3 retrievals: * **Workflow 4**: Continuous Data - - Uses the NWIS web services + - Uses the new USGS Water Data API - - Will be deprecated, this fall we'll have `read_waterdata_continuous` + - Modern data access point going forward * **Workflow 5**: Join Continuous and Discrete @@ -814,10 +814,6 @@ band_instruments |> * Continuous data is the high-frequency sensor data. -* The function to get that data today is `readNWISuv` - -* As NWIS gets deprecated, we expect to have `read_waterdata_continuous` soon - * We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. ## Workflow 4: Continuous data for known site @@ -828,15 +824,14 @@ band_instruments |> ```{r} #| results: markup -site_id <- "11455508" +site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" end_date <- "2024-06-01" -continuous_data <- readNWISuv(site_id, - p_code_rt, - start_date, - end_date) +continuous_data <- read_waterdata_continuous(monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date)) names(continuous_data) ``` @@ -846,19 +841,17 @@ names(continuous_data) ::: {.column width="30%"} ``` -[1] "agency_cd" -[2] "site_no" -[3] "dateTime" -[4] "X_99133_00000" -[5] "X_99133_00000_cd" -[6] "tz_cd" + [4] "time" "unit_of_measure" "parameter_code" + [7] "statistic_id" "value" "approval_status" +[10] "last_modified" "qualifier" ``` ::: :::: ``` -GET: https://nwis.waterservices.usgs.gov/nwis/iv/?site=11455508&format=waterml%2C1.1&ParameterCd=99133&startDT=2024-01-01&endDT=2024-06-01 +Requesting: +https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z ``` ## Workflow 4: Inspect @@ -866,8 +859,8 @@ GET: https://nwis.waterservices.usgs.gov/nwis/iv/?site=11455508&format=waterml%2 ```{r} #| output-location: column ggplot(data = continuous_data) + - geom_point(aes(x = dateTime, - y = X_99133_00000)) + geom_point(aes(x = time, + y = value)) ``` ## Workflow 5: Join Discrete and Continuous @@ -901,7 +894,7 @@ discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-114 #| code-line-numbers: "1|2-3|5|6|1-6" library(data.table) setDT(discrete_data)[, join_date := Activity_StartDateTime] -setDT(continuous_data)[, join_date := dateTime] +setDT(continuous_data)[, join_date := time] closest_dt <- continuous_data[discrete_data, on = .(join_date), roll = "nearest"] closest_dt <- data.frame(closest_dt) @@ -917,7 +910,7 @@ closest_dt <- data.frame(closest_dt) #| output-location: column ggplot(data = closest_dt) + geom_point(aes(x = Result_Measure, - y = X_99133_00000)) + + y = value)) + geom_abline() + expand_limits(x = 0, y = 0) + xlab("Discrete") + diff --git a/tutorials/changes_slides_deck.qmd b/tutorials/changes_slides_deck.qmd index 7c2b94c2..57b932fa 100644 --- a/tutorials/changes_slides_deck.qmd +++ b/tutorials/changes_slides_deck.qmd @@ -137,7 +137,9 @@ Open Geospatial Consortium (OGC), a non-profit international organization that d * [read_waterdata_field_measurements](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html) - Discrete hydrologic data (gage height, discharge, and readings of groundwater levels) -* [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) - Generalized function +* [read_waterdata_continuous](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_continuous.html) - Continuous data are collected via automated sensors installed at a monitoring location. + +* [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) - Generalized function. * [read_waterdata_metadata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_metadata.html) - Metadata @@ -454,8 +456,7 @@ daily <- read_waterdata_daily(monitoring_location_id = c("USGS-05406457", ```{r} #| echo: false -dt_me(daily |> - select(-daily_id), 6, "0.7em") +dt_me(daily, 6, "0.7em") ``` @@ -558,8 +559,7 @@ Latest discharge observation (00060) in Dane County, WI: dt_me(latest_dane_county |> sf::st_drop_geometry() |> select(-time_series_id, - -statistic_id, - -latest_continuous_id), 6, "0.7em") + -statistic_id), 6, "0.7em") ``` @@ -622,6 +622,25 @@ leaflet(data = latest_dane_county_daily |> values = ~value) ``` +## read_waterdata_continuous + +Replaces `readNWISuv`: + +```{r} +this_week <- read_waterdata_continuous(monitoring_location_id = c("USGS-05406457", + "USGS-05427930"), + parameter_code = c("00060", "00010"), + time = "P7D") +``` + +Currently only allows 3 years of data to be queried at once. + +::: footer + +::: + + + ## read_waterdata * This function is totally different! diff --git a/vignettes/Status.Rmd b/vignettes/Status.Rmd index 24610c7d..9cf91c27 100644 --- a/vignettes/Status.Rmd +++ b/vignettes/Status.Rmd @@ -48,7 +48,6 @@ df <- data.frame( "readNWISstat", "readNWISpeak", "readNWISuse", - "whatNWISdata" ), New = c( @@ -60,10 +59,11 @@ df <- data.frame( "read_waterdata_field_measurements", "read_waterdata_field_measurements", "read_waterdata", - rep("", 6) + "read_waterdata_continuous", + rep("", 5) ), - "Available on (branch)" = c(rep("main (CRAN)", 5), - "develop", "develop", "main (CRAN)", rep("", 6)) + "Available on (branch)" = c(rep("main (CRAN)", 8), + "develop", rep("", 5)) ) knitr::kable(df, col.names = c("WaterServices (legacy) function", "Water Data (new) function", "Available on (branch name)")) @@ -74,7 +74,7 @@ If you want to learn more about the new water data APIs, check out the ["What's ## API Keys -Do you make *a lot* of `dataRetrieval` WaterServices calls (e.g. using functions like `readNWISdv`, `readNWISuv`) per day? ...On the order of more than 50 function calls per hour? As you switch your workflows over to the new Water Data API functions, consider grabbing yourself an API key, which will bump your limit up to 1,000 requests per hour. Head to the [sign up page](https://api.waterdata.usgs.gov/signup) to get a token. +Do you regularly use `dataRetrieval`? As you switch your workflows over to the new Water Data API functions, consider grabbing yourself an API key, which will bump your limit up to 1,000 requests per hour. Head to the [sign up page](https://api.waterdata.usgs.gov/signup) to get a token. One you have your API key, add it to your `.Renviron` file like this: @@ -84,6 +84,14 @@ API_USGS_PAT = "[your api key]" Replace [your api key] with the alphanumeric code provided by the sign-up page. That's it! `dataRetrieval` will look for an `.Renviron` file in your directories and use it for making web service calls. +## Large Data Pulls + +If you manage large data pipelines, you might notice some timeouts or errors indicating your request was too large. This will be especially true for the continuous data. Continuous data requests are only allowed up to 3 years of data in a single request. + +HOWEVER, if you regularly make full period of record queries you may want to wait a few months before rewriting your workflows. We expect that a direct access to full period of record downloads will be available before the NWIS web services are shut down. We plan to make `dataRetrieval` function(s) to access that direct download access. + +Maybe you are running into limitations but a full period-of-record won't necessarily help. Consider using the R package [`targets`](https://books.ropensci.org/targets/). Stay tuned as we plan to put together a collection of examples and suggestions for large data pulls as the API service offerings evolve. + # Discrete Data In March 2024, NWIS **discrete water quality** services were "frozen": any public data retrieval using `readNWISqw()` no longer included any new data. Concurrently, the main [Water Quality Portal (WQP) API](https://www.waterqualitydata.us/) stopped serving new and updated USGS data (we will refer to this set of web services as "legacy"). Now, new and updated data are available from the [USGS Samples API](https://waterdata.usgs.gov/download-samples/#dataProfile=site) (for USGS data only) or in the [beta version](https://www.waterqualitydata.us/beta/) of the WQP (both USGS and non-USGS data). diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd index 2e5a4449..92623e0a 100644 --- a/vignettes/dataRetrieval.Rmd +++ b/vignettes/dataRetrieval.Rmd @@ -30,11 +30,11 @@ knitr::opts_chunk$set( ) ``` -The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). +The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the USGS Water Data API. For information on getting started in R and installing the package, see [Getting Started](#getting-started-in-r). Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. -A quick workflow for USGS `dataRetrieval` functions: +A quick workflow using `dataRetrieval` functions: ```{r workflow, echo=TRUE,eval=FALSE} library(dataRetrieval) @@ -52,7 +52,17 @@ rawDailyData <- read_waterdata_daily(monitoring_location_id = siteNumber, pCode <- read_waterdata_parameter_codes(parameter_code = parameterCd) ``` -USGS data are made available through the National Water Information System (NWIS). +## USGS Data API Access Tokens + +USGS data are made available through the USGS water data APIs. You can register an API key for use with USGS water data APIs. There are now limits on how many queries can be requested per IP address per hour. If you find yourself running into limits, you can request an API token here: + +Then save your token in your .Renviron file like this: + +``` +API_USGS_PAT = "my_super_secret_token" +``` + +You can use `usethis::edit_r_environ()` to edit find and open your .Renviron file. You will need to restart R for that variable to be recognized. Table 1 describes the functions available in the `dataRetrieval` package. @@ -60,7 +70,7 @@ Table 1 describes the functions available in the `dataRetrieval` package. Functions <- c( "read_waterdata", "read_waterdata_daily", - "readNWISuv", + "read_waterdata_continuous", "readNWISrating", "read_waterdata_field_measurements", "readNWISpeak", @@ -83,13 +93,13 @@ Functions <- c( ) Description <- c( - "Time series data using user-specified queries", # readNWISdata - "Daily values", # readNWISdv - "Instantaneous values", # readNWISuv + "Time series data using user-specified queries", # read_waterdata + "Daily values", # read_waterdata_daily + "Instantaneous values", # read_waterdata_continuous "Rating table for active streamgage", # readNWISrating - "Surface-water measurements", # readNWISmeas + "Surface-water measurements", # read_waterdata_field_measurements "Peak flow", # readNWISpeak - "Groundwater levels", # readNWISgwl + "Groundwater levels", # read_waterdata_field_measurements "Water use", # readNWISuse "Statistical service", # readNWISstat "Parameter code information", # read_waterdata_parameter_codes @@ -108,7 +118,8 @@ Description <- c( ) Source <- c("USGS Water Data API", "USGS Water Data API", - rep("NWIS",2), + "USGS Water Data API", + "NWIS", "USGS Water Data API", "NWIS", "USGS Water Data API", @@ -183,7 +194,6 @@ kable(data.df, Examples for using these site numbers, parameter codes, and statistic codes will be presented in subsequent sections. -There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as "Ice". Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns), unless the user sets an argument `convertType` to `FALSE`. In that case, the data is returned as a data frame that is entirely character columns. ## Site Information @@ -232,7 +242,7 @@ tableData <- sf::st_drop_geometry(tableData) knitr::kable(tableData, - caption = "Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]") + caption = "Table 4: Reformatted version of output from the read_waterdata_ts_meta function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]") # nolint end @@ -243,16 +253,16 @@ Table 4: Reformatted version of output from the whatNWISdata function for the Ch |monitoring_location_id |parameter_description |unit_of_measure |begin |end | |:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:----------|:----------| |USGS-01491000 |Specific conductance, water, unfiltered, microsiemens per centimeter at 25 degrees Celsius |uS/cm |2010-10-01 |2012-05-09 | -|USGS-01491000 |Dissolved oxygen, water, unfiltered, milligrams per liter |mg/l |2023-04-21 |2025-06-15 | -|USGS-01491000 |Discharge, cubic feet per second |ft^3/s |1948-01-01 |2025-06-15 | -|USGS-01645000 |Discharge, cubic feet per second |ft^3/s |1930-09-26 |2025-06-15 | +|USGS-01491000 |Dissolved oxygen, water, unfiltered, milligrams per liter |mg/l |2023-04-21 |2025-12-02 | +|USGS-01491000 |Discharge, cubic feet per second |ft^3/s |1948-01-01 |2025-12-02 | +|USGS-01645000 |Discharge, cubic feet per second |ft^3/s |1930-09-26 |2025-12-02 | |USGS-01491000 |Suspended sediment concentration, milligrams per liter |mg/l |1980-10-01 |1991-09-29 | |USGS-01491000 |Suspended sediment discharge, short tons per day |tons/day |1980-10-01 |1991-09-29 | -|USGS-01491000 |Nitrate plus nitrite, water, in situ, milligrams per liter as nitrogen |mg/l |2023-08-02 |2025-06-14 | -|USGS-01491000 |Temperature, water, degrees Celsius |degC |2023-04-21 |2025-06-15 | -|USGS-01491000 |Turbidity, water, unfiltered, monochrome near infra-red LED light, 780-900 nm, detection angle 90 +-2.5 degrees, formazin nephelometric units (FNU) |_FNU |2023-04-21 |2025-06-14 | +|USGS-01491000 |Nitrate plus nitrite, water, in situ, milligrams per liter as nitrogen |mg/l |2023-08-02 |2025-12-02 | +|USGS-01491000 |Temperature, water, degrees Celsius |degC |2023-04-21 |2025-12-02 | +|USGS-01491000 |Turbidity, water, unfiltered, monochrome near infra-red LED light, 780-900 nm, detection angle 90 +-2.5 degrees, formazin nephelometric units (FNU) |_FNU |2023-04-21 |2025-12-02 | |USGS-01491000 |Temperature, water, degrees Celsius |degC |2010-10-01 |2012-05-09 | -|USGS-01491000 |Specific conductance, water, unfiltered, microsiemens per centimeter at 25 degrees Celsius |uS/cm |2023-04-21 |2025-06-14 | +|USGS-01491000 |Specific conductance, water, unfiltered, microsiemens per centimeter at 25 degrees Celsius |uS/cm |2023-04-21 |2025-12-02 | ## Parameter Information @@ -268,9 +278,7 @@ parameterINFO <- read_waterdata_parameter_codes(parameter_code = parameterCd) ## Daily Data -To obtain daily records of USGS data, use the `readNWISdv` function. The arguments for this function are `siteNumbers`, `parameterCd`, `startDate`, `endDate`, and `statCd` (defaults to "00003"). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from [https://waterservices.usgs.gov/docs/dv-service/](https://waterservices.usgs.gov/docs/dv-service/). - -The dates (start and end) must be in the format "YYYY-MM-DD" (note: the user must include the quotes). Setting the start date to "" (no space) will prompt the program to ask for the earliest date, and setting the end date to "" (no space) will prompt for the latest available date. +To obtain daily records of USGS data, use the `read_waterdata_daily` function. ```{r label=getNWISDaily, echo=TRUE, eval=FALSE} @@ -345,39 +353,25 @@ legend("topleft", unique(temperatureAndFlow$unit_of_measure), ## Unit Data -Any data collected at regular time intervals (such as 15-minute or hourly) are known as "unit values". Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function `readNWISuv`. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. +Any data collected at regular time intervals (such as 15-minute or hourly) are known as "unit values". Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function `read_waterdata_continuous`. Here is an example of a retrieval of such data. ```{r label=readNWISuv, eval=FALSE} parameterCd <- "00060" # Discharge startDate <- "2012-05-12" endDate <- "2012-05-13" -dischargeUnit <- readNWISuv(siteNumber, parameterCd, startDate, endDate) -dischargeUnit <- renameNWISColumns(dischargeUnit) +dischargeUnit <- read_waterdata_continuous(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c(startDate, endDate)) + ``` The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the `startDate` through the `endDate` (starting and ending with midnight locally-collected time). The dateTime column is converted to UTC (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm). -To override the UTC timezone, specify a valid timezone in the tz argument. Default is "", which will keep the dateTime column in UTC. Other valid timezones are: - -``` -America/New_York -America/Chicago -America/Denver -America/Los_Angeles -America/Anchorage -America/Honolulu -America/Jamaica -America/Managua -America/Phoenix -America/Metlakatla -``` - -Data are retrieved from [https://waterservices.usgs.gov/docs/instantaneous-values/](https://waterservices.usgs.gov/docs/instantaneous-values/). There are occasions where NWIS values are not reported as numbers, instead a common example is "Ice". Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in [metadata](#embedded-metadata) section. ## Groundwater Level Data -Groundwater level measurements can be obtained with the `readNWISgwl` function. Information on the returned data can be found with the `comment` function, and attached attributes as described in the [metadata](#embedded-metadata) section. +Groundwater level measurements can be obtained with the `read_waterdata_field_measurements` function. ```{r gwlexample, echo=TRUE, eval=FALSE} siteNumber <- "USGS-434400121275801" @@ -409,9 +403,7 @@ attr(ratingData, "RATING") ## Surface-Water Measurement Data -These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. - -Surface-water measurement data can be obtained with the `readNWISmeas` function. +These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Surface-water measurement data can be obtained with the `read_waterdata_field_measurements` function. ```{r surfexample, echo=TRUE, eval=FALSE} surfaceData <- read_waterdata_field_measurements(monitoring_location_id = "USGS-01594440") @@ -419,6 +411,7 @@ surfaceData <- read_waterdata_field_measurements(monitoring_location_id = "USGS- ## Water Use Data + Retrieves water use data from USGS Water Use Data for the Nation. See [https://waterdata.usgs.gov/nwis/wu](https://waterdata.usgs.gov/nwis/wu) for more information. All available use categories for the supplied arguments are retrieved. ```{r eval=FALSE} @@ -461,74 +454,6 @@ specificCond <- readWQPqw( ``` -# Generalized Retrievals - -The previous examples all took specific input arguments: `siteNumber`, `parameterCd` (or characteristic name), `startDate`, `endDate`, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. - -## NWIS - -### Sites: whatNWISsites - -The function `whatNWISsites` can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the `...` argument, which allows the user to use any arbitrary input argument. We can then use the service [here](https://waterservices.usgs.gov/docs/site-service) to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: - -[https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv](https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv) - -The following `dataRetrieval` code can be used to get those sites: - -```{r siteSearch, eval=FALSE} -sites <- whatNWISsites( - bBox = c(-83.0, 36.5, -81.0, 38.5), - parameterCd = c("00010", "00060"), - hasDataTypeCd = "dv" -) -``` - -### Data: readNWISdata - -For NWIS data, the function `readNWISdata` can be used. The argument listed in the R help file is `...` and `service` (only for data requests). Table 5 describes the services are available. - -```{r echo=FALSE} -# nolint start -Service <- c("dv", "iv", "gwlevels", "measurements", "peak", "stat") -Description <- c("Daily", "Instantaneous", "Groundwater Levels", "Surface Water Measurements", "Peak Flow", "Statistics Service") -URL <- c( - "https://waterservices.usgs.gov/docs/dv-service/", - "https://waterservices.usgs.gov/docs/instantaneous-values/", - "https://waterservices.usgs.gov/docs/groundwater-levels/", - "https://waterdata.usgs.gov/nwis/measurements/", - "https://nwis.waterdata.usgs.gov/usa/nwis/peak/", - "https://waterservices.usgs.gov/docs/statistics/" -) - -tableData <- data.frame(Service, - Description, - URL, - stringsAsFactors = FALSE -) - - -kable(tableData, - caption = "Table 5: NWIS general data calls" -) -# nolint end -``` - - -The `...` argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi2, and the most recent daily discharge data. That request would be done as follows: - -```{r dataExample, eval=FALSE} -dischargeWI <- readNWISdata( - service = "dv", - stateCd = "WI", - parameterCd = "00060", - drainAreaMin = "50", - statCd = "00003" -) - -siteInfo <- attr(dischargeWI, "siteInfo") -``` - - ## WQP Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found [here](https://www.waterqualitydata.us/webservices_documentation). diff --git a/vignettes/join_by_closest.Rmd b/vignettes/join_by_closest.Rmd index 7b041cfa..8341c4df 100644 --- a/vignettes/join_by_closest.Rmd +++ b/vignettes/join_by_closest.Rmd @@ -9,8 +9,8 @@ output: vignette: > %\VignetteIndexEntry{Join by closest date} \usepackage[utf8]{inputenc} - %\VignetteEngine{knitr::rmarkdown} %\VignetteDepends{dplyr} + %\VignetteEngine{knitr::rmarkdown} editor_options: chunk_output_type: console --- @@ -40,55 +40,35 @@ Let's look at site "01646500", and a nearby site with a real-time nitrate-plus-n ```{r getData} library(dataRetrieval) -site_uv <- "01646500" -site_qw <- "USGS-01646580" +site_uv <- "USGS-01646500" +site_samples <- "USGS-01646580" pcode_uv <- "99133" -pcode_qw <- "00631" +pcode_samples <- "00631" start_date <- as.Date("2018-01-01") end_date <- as.Date("2020-01-01") -qw_data <- readWQPqw(site_qw, pcode_qw, +samples_data <- readWQPqw(site_samples, pcode_samples, startDate = start_date, endDate = end_date) -uv_data <- readNWISuv(siteNumbers = site_uv, - parameterCd = c(pcode_uv, "00060"), - startDate = start_date, - endDate = end_date) +uv_data <- read_waterdata_continuous(monitoring_location_id = site_uv, + parameter_code = c(pcode_uv), + time = c(start_date, end_date)) ``` -The sensor data ("uv" data) at this particular site has 2 columns of data that are important. The first task is to combine those columns. This is rather unique to this particular site and probably won't need to be done generally. - -```{r trimUVdata} -library(dplyr) - -uv_trim <- uv_data |> - select(uv_date = dateTime, - val1 = X_SUNA...Discontinued._99133_00000, - val2 = X_SUNA_99133_00000, - flow = X_00060_00000) |> - mutate(val_uv = if_else(is.na(val1), val2, val1)) |> - select(-val1, -val2) - -``` +Next we'll clean up the discrete water quality data to make it easy to follow in this tutorial. -```{r showuvTrim, echo=FALSE} -knitr::kable(head(uv_trim)) -``` - -Next we'll clean up the discrete water quality "qw" data to make it easy to follow in this tutorial. - -```{r trimQWdata} -qw_trim <- qw_data |> +```{r trimsamplesdata} +samples_trim <- samples_data |> filter(ActivityTypeCode == "Sample-Routine", !is.na(ActivityStartDateTime)) |> - select(qw_date = ActivityStartDateTime, - val_qw = ResultMeasureValue, + select(samples_date = ActivityStartDateTime, + val_samples = ResultMeasureValue, det_txt = ResultDetectionConditionText) ``` -```{r showqwtrim, echo=FALSE} -knitr::kable(head(qw_trim)) +```{r showsamplestrim, echo=FALSE} +knitr::kable(head(samples_trim)) ``` Finally, we'll use the `data.table` package to do a join to the nearest date. The code to do that is here: @@ -96,29 +76,29 @@ Finally, we'll use the `data.table` package to do a join to the nearest date. Th ```{r} library(data.table) -setDT(qw_trim)[, join_date := qw_date] +setDT(samples_trim)[, join_date := samples_date] -setDT(uv_trim)[, join_date := uv_date] +setDT(uv_data)[, join_date := time] -closest_dt <- uv_trim[qw_trim, on = .(join_date), roll = "nearest"] +closest_dt <- uv_data[samples_trim, on = .(join_date), roll = "nearest"] ``` `closest_dt` is a `data.table` object. It similar to a data.frame, but not identical. We can convert it to a data.frame and then use `dplyr` commands. Note: the whole analysis can be done via `data.table`, but most examples in `dataRetrieval` have used `dplyr`, which is why we bring it back to data.frame. `dplyr` also has a `join_by(closest())` option, but it is more complicated because you can only specify the closeness in either the forward or backwards direction (and we want either direction). -We can calculate "delta_time" - the difference in time between the uv and qw data. We'll probably want to add a threshold that we don't join values if they are too far apart in time. In this example, if the difference is greater than 24 hours, we'll substitute `NA`. +We can calculate "delta_time" - the difference in time between the uv and samples data. We'll probably want to add a threshold that we don't join values if they are too far apart in time. In this example, if the difference is greater than 24 hours, we'll substitute `NA`. ```{r} -qw_closest <- data.frame(closest_dt) |> - mutate(delta_time = difftime(qw_date, uv_date, +samples_closest <- data.frame(closest_dt) |> + mutate(delta_time = difftime(samples_date, time, units = "hours"), - val_uv = if_else(abs(as.numeric(delta_time)) >= 24, NA, val_uv)) |> + val_uv = if_else(abs(as.numeric(delta_time)) >= 24, NA, value)) |> select(-join_date) ``` -```{r showqwClosest, echo=FALSE} -knitr::kable(head(qw_closest)) +```{r showsamplesClosest, echo=FALSE} +knitr::kable(head(samples_closest)) ``` Here are a few plots to show the applications of these joins: @@ -126,32 +106,22 @@ Here are a few plots to show the applications of these joins: ```{r} library(ggplot2) -ggplot(data = qw_closest) + - geom_point(aes(x = val_uv, y = val_qw)) + +ggplot(data = samples_closest) + + geom_point(aes(x = val_uv, y = val_samples)) + theme_bw() + xlab("Sensor") + ylab("Discrete") ``` -```{r} -ggplot(data = qw_closest) + - geom_point(aes(x = flow, y = val_qw)) + - theme_bw() + - xlab("Discharge") + - ylab("Concentration") + - scale_x_log10() + - scale_y_log10() - -``` ```{r} ggplot() + - geom_line(data = uv_trim, - aes(x = uv_date, val_uv), + geom_line(data = uv_data, + aes(x = time, value), color = "lightgrey") + - geom_point(data = qw_closest, - aes(x = qw_date, y = val_qw), + geom_point(data = samples_closest, + aes(x = samples_date, y = val_samples), color= "red") + theme_bw() + ggtitle("Red dots = discrete samples, grey lines = continuous sensor") + diff --git a/vignettes/movingAverages.Rmd b/vignettes/movingAverages.Rmd index 1574f971..96b024b4 100644 --- a/vignettes/movingAverages.Rmd +++ b/vignettes/movingAverages.Rmd @@ -16,9 +16,11 @@ editor_options: chunk_output_type: console --- -:warning: +**WARNING** + This post is very old! A better way to do all these plots and calculations can be found here: +**WARNING** This post will show simple way to calculate moving averages, calculate historical-flow quantiles, and plot that information. The goal is to reproduce the graph at this link: [PA Graph](http://pa.water.usgs.gov/drought/indicators/sw/images/f30_01538000.html). The motivation for this post was inspired by a USGS colleague that that is considering creating these type of plots in R. We thought this plot provided an especially fun challenge - maybe you will, too! @@ -261,10 +263,9 @@ styled.plot <- simple.plot + panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + - labs(list( - title = title.text, + labs(title = title.text, y = "30-day moving ave", x = "" - )) + + ) + scale_fill_manual( name = "", breaks = label.text, values = c("red", "orange", "yellow", "darkgreen") diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd index 746c9257..f8feda77 100644 --- a/vignettes/read_waterdata_functions.Rmd +++ b/vignettes/read_waterdata_functions.Rmd @@ -181,6 +181,27 @@ ggplot(data = daily_modern) + ``` +## Continuous + +The `read_waterdata_continuous` function replaces the `readNWISuv` function. + +`r dataRetrieval:::get_description("continuous")` + +To access these services on a web browser, go to . + +```{r} +sensor_data <- read_waterdata_continuous(monitoring_location_id = "USGS-01491000", + parameter_code = c("00060", "00010"), + statistic_id = "00003", + time = c("2023-10-01", "2024-09-30")) +``` + +Currently this service only allows up to 3 years of data to be requested at once. If no time is specified in the query, the latest year of data is returned. If you manage large data pipelines, this might sound very restrictive. + +**HOWEVER**, if you regularly make full period of record queries you may want to wait a few months before rewriting your workflows. We expect that a direct access to full period of record downloads will be available before the NWIS web services are shut down. We plan to make `dataRetrieval` function(s) to access that direct download access. + +Continuous data does not return a geometry column, and bounding box queries are not supported. + ## Field Measurements The `read_waterdata_field_measurements` replaces both the `readNWISgwl` and `readNWISmeas` functions. @@ -538,13 +559,13 @@ This section will initially be a random stream of consciousness on lessons learn A semi-common way to find a lot of data in the past would have been to use a monitoring location query to get a huge list of sites, and then use that huge list of sites (maybe winnowing it down a little) to get the data. These new services return a 403 error if your request is too big ("web server understands your request but refuses to authorize it"). This is true whether or not the request is a GET or POST request (something that is taken care of under the hood), and seems to be a character limit of the overall request. Roughly, it seems like if you were requesting more than 250 monitoring locations, the response will immediately return with a 403 error. -There are at least 2 ways to deal with this. One is to manually split the data requests and bind the results together later. The other is to use the bounding box of the initial request as an input to the data request. Potentially some sites would need to be filtered out later using this method. +There are several ways to deal with this. One is to manually split the data requests and bind the results together later. The other is to use the bounding box of the initial request as an input to the data request. Potentially some sites would need to be filtered out later using this method. -Example: +Example, let's get all the stream sites in Ohio: ```{r} ohio <- read_waterdata_monitoring_location(state_name = "Ohio", - site_type_code = "ST") + site_type_code = "ST") ``` @@ -563,14 +584,14 @@ We could use the fact that the `ohio` data frame contains geospatial information ```{r} ohio_discharge <- read_waterdata_daily(bbox = sf::st_bbox(ohio), - parameter_code = "00060", - time = "P7D") + parameter_code = "00060", + time = "P7D") ``` -A reasonable `r nrow(ohio_discharge)` are returned with the bounding box query. +A reasonable `r nrow(ohio_discharge)` are returned with the bounding box query. However, sites that are not classified as stream (site_type_code = "ST") are returned in this request, so it is not exactly the data you are looking for, and would need to be filtered to just the sites in the `ohio` data frame. -Maybe you have a list of sites that are scattered around the country. The bounding box method might not be ideal. There are several ways to loop through a set of sites, here is one simple example: +Maybe you have a list of sites that are scattered around the country. The bounding box method will not be ideal. There are several ways to loop through a set of sites, here is one simple example that chunks up the request into groups of 200 sites: ```{r} big_vector_of_sites <- ohio$monitoring_location_id @@ -584,14 +605,88 @@ for(sites in site_list){ time = "P7D") if(nrow(df_sites) == 0){ next + } else if(nrow(data_returned) == 0){ + data_returned <- df_sites + } else { + data_returned <- rbind(data_returned, df_sites) } - data_returned <- rbind(data_returned, df_sites) } ``` Note there is fewer data returned in `data_returned` because those sites are already filtered down to just "Stream" sites. The bounding box results `ohio_discharge` contained other types of monitoring location types. +If you run into any problems with that loop (maybe you hit your limit for an hour, maybe the internet went down, maybe there was a timeout from the services), you'll need to re-run the whole loop again and hope that it works the next time. A more robust way to do it would be to create a [`targets`](https://books.ropensci.org/targets/) pipeline. `targets` will take care of remembering what job completed, and what job needs to be re-run. + +Here's the Ohio example using a dynamic branch with `targets`. This code would be saved in a file `_targets.R`: + +```{r eval=FALSE} +# Load packages required to define the pipeline: +library(targets) + +# Set target options: +tar_option_set(packages = c("dataRetrieval")) + +# list of targets to build +list( + tar_target(name = ohio_sites, + command = read_waterdata_monitoring_location(state_name = "Ohio", + site_type_code = "ST")), + tar_target(name = site_chunks, + command = split(ohio_sites$monitoring_location_id, ceiling(seq_along(ohio_sites$monitoring_location_id)/200))), + tar_target(name = dv_measurements, + command = read_waterdata_daily(monitoring_location_id = site_chunks[[1]], + time = "P7D", + parameter_code = "00060"), + pattern = site_chunks, + iteration = "list" + ), + tar_target(name = remove_empties, + command = dv_measurements[sapply(dv_measurements, function(x) dim(x)[1]) > 0]), + tar_target(name = ohio_discharge, + command = do.call(rbind, remove_empties)) + +) +``` + +Once the _targets.R file is created, you can run: + +```{r eval=FALSE} +library(targets) +tar_make() +``` + +If everything went according to plan, the last several printed messages would look like this: + +``` +✔ dv_measurements completed [5.8s, 179.58 kB] ++ remove_empties dispatched +✔ remove_empties completed [0ms, 123.06 kB] ++ ohio_discharge dispatched +✔ ohio_discharge completed [20ms, 65.11 kB] +✔ ended pipeline [16.9s, 19 completed, 0 skipped] +``` + +If instead it ended with any errors, it would look like this: + +``` +✖ errored pipeline [9.3s, 0 completed, 21 skipped] +Error: +! Error in tar_make(): +``` + +If the errors were related to the internet, service outages, or exceeding API requests, you could re-run the pipeline and only the failed jobs would be re-run: + +```{r eval=FALSE} +# Rerun: +tar_make() +``` + +Once the pipeline has completed, you can load the `ohio_discharge` data frame into your environment with `tar_load`: + +```{r eval=FALSE} +tar_load(ohio_discharge) +``` ## limit vs max_results diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index c77bdb7f..825ffbde 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -58,40 +58,35 @@ Finally, if there are still questions that the vignette and help files don't ans -For a longer introduction to the `dataRetrieval` package, see: - - - # Orientation `dataRetrieval` provides US water data mainly via 3 sources: -* National Water Information System (NWIS) +* USGS APIs (Water Data) * Water Quality Portal (WQP) -* USGS APIs (Water Data), which are new or in-development . +* Water Services from National Water Information System (NWIS) - Legacy system that will eventually be retired. -Functions in `dataRetrieval` look like `readNWISdv`, `readNWISuv`, `readWQPqw`, `whatNWISdata`, etc. What does that mean? The functions are generally structured with a prefix, middle, and suffix: + +Functions in `dataRetrieval` look like `read_waterdata_daily`, `read_waterdata_continuous`, `readWQPqw`, `summarize_waterdata_samples`, etc. What does that mean? The functions are generally structured with a prefix, middle, and suffix: * _Prefix_: "read" or "what" + "read" will access full data sets - + "what" will access data availability -* _Middle_: "NWIS", "waterdata", "WQP": - + NWIS functions get data from legacy NWIS web services. - + Water Data (waterdata) functions are the functions that will eventually replace the legacy NWIS functions. These pull from modern USGS API services. + + "summarize" will access data availability +* _Middle_: "waterdata", "WQP", "NWIS: + + Water Data (waterdata) functions are the functions that get USGS water data. These pull from modern USGS API services. + WQP functions are for discrete water-quality data from the Water Quality Portal. + + NWIS functions get data from legacy NWIS water services. * _Suffix_: "data" or other: + Functions that end in "data": These are flexible, powerful functions that allow complex user queries. + Functions that don't end with "data" are user-friendly functions that assume site, code, and start/end dates are known. - - # Data Overview -There are many types of data served from NWIS. To understand how the services are separated, it's helpful to understand that each data type is retrieved from a completely different web service or API. +There are many types of data available from `dataRetrieval`. To understand how the services are separated, it's helpful to understand that each data type is retrieved from a completely different web service or API. * NWIS has traditionally been the source for all USGS water data -* Legacy NWIS services will be retired (scheduled 2026, but uncertain): +* NWIS Water Services will be retired (scheduled late 2026, but uncertain): * * USGS water data functions will slowly replace NWIS functions @@ -100,28 +95,40 @@ There are many types of data served from NWIS. To understand how the services ar * `read_waterdata_monitoring_location` can replace `readNWISsite` * `read_waterdata_ts_meta` can replace `whatNWISdata` * `read_waterdata_parameter_codes` can replace `readNWISpCode` + * `read_waterdata_continuous` can replace `readNWISuv` * Discrete water quality data: * WQP functions should be used when accessing non-USGS discrete water quality data * `read_waterdata_samples` should be used for USGS data -# NWIS Data: Current NWIS offerings +# USGS Water Data APIs -| data_type_cd |Function| Data description | Replacement Function | +| data_type_cd |Legacy Function| Data description | New Function | |--------|:-------|------:|-------:| -|uv|[readNWISuv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuv.html)|Continuous data| None yet | -|dv|[readNWISdv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdv.html)|Daily aggregated | [read_waterdata_daily](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_daily.html) | -|gwlevels|[readNWISgwl](https://doi-usgs.github.io/dataRetrieval/reference/readNWISgwl.html)|Groundwater levels | [read_waterdata_field_measurements](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html) | -|site|[readNWISsite](https://doi-usgs.github.io/dataRetrieval/reference/readNWISsite.html)|Site metadata| [read_waterdata_monitoring_location](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_monitoring_location.html) | -|pcode|[readNWISpCode](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpCode.html)|Parameter code metadata | [read_waterdata_parameter_codes](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_parameter_codes.html) | -|stat|[readNWISstat](https://doi-usgs.github.io/dataRetrieval/reference/readNWISstat.html)| Site statistics | None yet | -|rating|[readNWISrating](https://doi-usgs.github.io/dataRetrieval/reference/readNWISrating.html)| Rating curves| None yet | -|peak|[readNWISpeak](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpeak.html)|Peak flow| None yet | -|use|[readNWISuse](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuse.html)|Water Use| None yet | -|meas|[readNWISmeas](https://doi-usgs.github.io/dataRetrieval/reference/readNWISmeas.html)|Discrete surface water| [read_waterdata_field_measurements](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html)| -| | [readNWISdata](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdata.html) | General data import for NWIS| [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) | +|uv|`readNWISuv`|Continuous data| `read_waterdata_continuous]` | +|dv|`readNWISdv`|Daily aggregated | `read_waterdata_daily` | +|gwlevels|`readNWISgwl`|Groundwater levels | `read_waterdata_field_measurements` | +|site|`readNWISsite`|Site metadata| `read_waterdata_monitoring_location` | +|pcode|`readNWISpCode`|Parameter code metadata | `read_waterdata_parameter_codes` | +|stat|`readNWISstat`| Site statistics | None yet | +|rating|`readNWISrating`| Rating curves| None yet | +|peak|`readNWISpeak`|Peak flow| None yet | +|use|`readNWISuse`|Water Use| None yet | +|meas|`readNWISmeas`|Discrete surface water| `read_waterdata_field_measurements`| +| | `readNWISdata` | General data import | `read_waterdata` | + +## Water Data API key + +Do you regularly use `dataRetrieval`? As you switch your workflows over to the new Water Data API functions, consider grabbing yourself an API key, which will bump your limit up to 1,000 requests per hour. Head to the [sign up page](https://api.waterdata.usgs.gov/signup) to get a token. + +One you have your API key, add it to your `.Renviron` file like this: + +```{r} +API_USGS_PAT = "your api key" +``` +Replace [your api key] with the alphanumeric code provided by the sign-up page. That's it! `dataRetrieval` will look for an `.Renviron` file in your directories and use it for making web service calls. ## USGS Basic Retrievals @@ -272,20 +279,22 @@ datatable(data_available, ``` -The time series that have "Instantaneous" in the computation_identifier column will be available in the instantaneous data service (currently `readNWISuv`), and the rest of the data will be available in the daily service (`read_waterdata_daily`). +The time series that have "Instantaneous" in the computation_identifier column are available in the continuous data service (`read_waterdata_continuous`), and the rest of the data will be available in the daily service (`read_waterdata_daily`). ```{r eval=FALSE, echo=TRUE} dv_pcodes <- data_available$parameter_code[data_available$computation_identifier != "Instantaneous"] stat_cds <- data_available$statistic_id[data_available$computation_identifier != "Instantaneous"] +stat_cds <- unique(stat_cds[!is.na(stat_cds)]) + dv_data <- read_waterdata_daily(monitoring_location_id = site, parameter_code = unique(dv_pcodes), - statistic_id = unique(stat_cds)) + statistic_id = stat_cds) uv_pcodes <- data_available$parameter_code[data_available$computation_identifier == "Instantaneous"] -uv_data <- readNWISuv(siteNumbers = gsub("USGS-", "", site), - parameterCd = unique(uv_pcodes)) +uv_data <- read_waterdata_continuous(monitoring_location_id = site, + parameter_code = unique(uv_pcodes)) peak_data <- readNWISpeak(gsub("USGS-", "", site))