diff --git a/.Rbuildignore b/.Rbuildignore index d03620d9..42fe85da 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -96,6 +96,7 @@ vignettes/Wide_data_example.xlsx vignettes/messyData.png ^ci$ ^public$ +^tutorials$ ^docker$ vignettes/WQX_3.Rmd vignettes/samples_data.Rmd diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index e2b5705f..94a35736 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -27,6 +27,7 @@ jobs: API_USGS_PAT: ${{ secrets.API_USGS_PAT }} R_KEEP_PKG_SOURCE: yes CUSTOM_DR_UA: 'GitHub_CI' + _R_CHECK_DONTTEST_EXAMPLES_: false steps: - uses: actions/checkout@9a9194f87191a7e9055e3e9b95b8cfb13023bb08 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 15eb0849..4491fc34 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -59,12 +59,18 @@ jobs: any::gridExtra local::. needs: website - + - name: Create public directory + run: | + mkdir public - name: Build site run: | - pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, dest_dir = "public") | + pkgdown::build_site(override = list(destination = "public")) | file.copy(from = "./public/articles/logo.png",to = "./public/reference/logo.png") shell: Rscript {0} + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + - name: Render Quarto Project + uses: quarto-dev/quarto-actions/render@v2 - name: Upload artifact uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa with: diff --git a/.gitignore b/.gitignore index f5369ab4..210360f9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,8 @@ docs /doc/ /Meta/ /Temp/ +vignettes/*.html +vignettes/*.R + + +/.quarto/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7a65cbed..4322313c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,9 +23,9 @@ variables: _R_CHECK_FORCE_SUGGESTS_: "true" _R_CHECK_DONTTEST_EXAMPLES_: "false" R_PROFILE: "$R_HOME/etc/Rprofile.site" + R_LIBS_USER: "$CI_PROJECT_DIR/ci/lib" APT_CACHE: "$CI_PROJECT_DIR/ci/lib/apt-cache" CRAN: "https://rpkg.chs.usgs.gov/prod-cran/latest" - R_LIBS_USER: "$CI_PROJECT_DIR/ci/lib" R_LIBS: "$CI_PROJECT_DIR/ci/lib" BUILD_LOGS_DIR: "$CI_PROJECT_DIR/ci/logs" NOT_CRAN: "true" @@ -43,7 +43,6 @@ build-image: rules: - changes: - docker/Dockerfile - - .gitlab-ci.yml script: - echo ${CI_REGISTRY_PASSWORD} | docker login -u ${CI_REGISTRY_USER} --password-stdin $CI_REGISTRY - docker pull ${CI_REGISTRY_IMAGE}:latest || true @@ -59,7 +58,7 @@ buildcheck: dependencies: - build-image script: - - R CMD build . --no-manual + - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - Rscript -e 'devtools::check(document = FALSE, args = "--no-tests", check_dir = Sys.getenv("BUILD_LOGS_DIR"), vignettes = FALSE)' unittests: @@ -97,10 +96,15 @@ longtest: pages: stage: end - cache: [] + dependencies: + - build-image + - buildcheck script: + - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - Rscript -e 'pkgdown::build_site(override = list(destination = "public"))' - Rscript -e 'file.copy(from = "./public/articles/logo.png", to = "./public/reference/logo.png")' + - quarto render + artifacts: paths: - $PAGES_OUTDIR @@ -119,5 +123,3 @@ Validate Inventory: --token "${GIT_TOKEN_CUSTOM}" tags: - chs-shared - - diff --git a/.gitlab/issue_templates/waterdata_additions.md b/.gitlab/issue_templates/waterdata_additions.md new file mode 100644 index 00000000..33238db8 --- /dev/null +++ b/.gitlab/issue_templates/waterdata_additions.md @@ -0,0 +1,29 @@ +--- +name: New waterdata function +about: Checklist for adding new USGS waterdata endpoint +--- + +New features should include all of the following work: + +* [ ] Create the `read_waterdata_` file. File should: + - [ ] Update endpoint name + - [ ] Update parameter list in Roxygen section + - [ ] Update arguments + - [ ] Update examples + - [ ] Think about if sorting looks right, if convertType should be used, etc. +* [ ] Add endpoint name to AAA.R `.onLoad` +* [ ] Add deprecate notice to corresponding NWIS function +* [ ] Comment out or delete deprecated NWIS examples +* [ ] Replace any NWIS tests with new waterdata function +* [ ] Create new tests in tests/testthat folder for unique situations +* [ ] Add example to read_waterdata_functions.Rmd vignette +* [ ] Update Status.Rmd vignette +* [ ] Update dataRetrieval.Rmd vignette +* [ ] Update tutorial.Rmd vignette +* [ ] Update NEWS +* [ ] Update _pkgdown.yml +* [ ] Check if README needs to be updated +* [ ] Update version (for development, only bump up last 4 digits) +* [ ] Create PR for "develop" branch on GitHub +* [ ] Create and merge MR for "develop" branch on code.usgs.gov. This will generate development version of documentation. + diff --git a/DESCRIPTION b/DESCRIPTION index b7b56c3c..1d4a7a5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.20 +Version: 2.7.21 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", diff --git a/NAMESPACE b/NAMESPACE index 18fda1da..8f0f178d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ export(readWQPsummary) export(read_USGS_samples) export(read_waterdata) export(read_waterdata_daily) +export(read_waterdata_field_measurements) export(read_waterdata_latest_continuous) export(read_waterdata_metadata) export(read_waterdata_monitoring_location) diff --git a/NEWS b/NEWS index 07fda978..fda0680e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +dataRetrieval 2.7.21 +=================== +* Added read_waterdata_field_measurements to access new USGS water data API. +* Added deprecation warning to readNWISgwl and readNWISmeas. +* Added parent_time_series_id to read_waterdata_ts_meta. +* Updated some documentation. + dataRetrieval 2.7.20 =================== * Added id transformation to read_waterdata to match other functions diff --git a/R/AAA.R b/R/AAA.R index f397a72d..603ffe4d 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -7,7 +7,8 @@ pkg.env <- new.env() options("dataRetrieval" = list("api_version" = "v0")) services <- c("server", "daily", "time-series-metadata", - "monitoring-locations", "latest-continuous") + "monitoring-locations", "latest-continuous", + "field-measurements") collections <- c("parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", "aquifer-types", "coordinate-accuracy-codes", "coordinate-datum-codes", "coordinate-method-codes", "medium-codes", diff --git a/R/constructNWISURL.R b/R/constructNWISURL.R index fedb8edb..c6a2a494 100644 --- a/R/constructNWISURL.R +++ b/R/constructNWISURL.R @@ -293,7 +293,7 @@ constructNWISURL <- function(siteNumbers, ) url <- httr2::req_headers(url, - `Accept-Encoding` = c("compress", "gzip", "deflate")) + `Accept-Encoding` = c("compress", "gzip", "deflate")) return(url) } @@ -426,7 +426,7 @@ constructWQPURL <- function(siteNumbers, } baseURL <- httr2::req_headers(baseURL, - `Accept-Encoding` = c("compress", "gzip", "deflate")) + `Accept-Encoding` = c("compress", "gzip", "deflate")) return(baseURL) } diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index 5ab72217..74fc05d6 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -85,10 +85,12 @@ construct_api_requests <- function(service, POST = TRUE } + get_list <- get_list[!is.na(get_list)] + time_periods <- c("last_modified", "datetime", "time", "begin", "end") if(any(time_periods %in% names(get_list))){ - for(i in time_periods){ + for(i in time_periods[time_periods %in% names(get_list)]){ dates <- FALSE if (all(service == "daily" & i != "last_modified")){ dates <- TRUE @@ -299,6 +301,7 @@ switch_properties_id <- function(properties, id_name, service){ #' #' start <- c("2021-01-01", NA) #' dataRetrieval:::format_api_dates(start) +#' dataRetrieval:::format_api_dates(start, TRUE) #' #' end <- c(NA, "2021-01-01") #' dataRetrieval:::format_api_dates(end) @@ -316,10 +319,12 @@ switch_properties_id <- function(properties, id_name, service){ #' #' dataRetrieval:::format_api_dates(start_end) #' +#' # If you don't specify a timezone, it will assume UTC #' start_end2 <- c("2021-01-01 12:15:00", "") #' dataRetrieval:::format_api_dates(start_end2) #' -#' start_end2 <- c("2021-01-01T12:15:00Z", "") +#' # If you do specify a timezone, it should maintain it, but convert to UTC: +#' start_end2 <- c("2021-01-01T12:15:00-0500", "") #' dataRetrieval:::format_api_dates(start_end2) #' format_api_dates <- function(datetime, date = FALSE){ @@ -336,18 +341,18 @@ format_api_dates <- function(datetime, date = FALSE){ return(datetime) } else { if(date){ - datetime <- format(datetime, "%Y-%m-%d") + datetime <- format(lubridate::as_datetime(datetime), "%Y-%m-%d") } else { - datetime <- lubridate::format_ISO8601(datetime, usetz = TRUE) + datetime <- lubridate::format_ISO8601(lubridate::as_datetime(datetime), usetz = "Z") } } } else if (length(datetime) == 2) { if(date){ - datetime <- paste0(format(as.Date(datetime), "%Y-%m-%d"), collapse = "/") + datetime <- paste0(format(lubridate::as_datetime(datetime), "%Y-%m-%d"), collapse = "/") } else { - datetime <- paste0(lubridate::format_ISO8601(as.POSIXct(datetime), - usetz = TRUE), + datetime <- paste0(lubridate::format_ISO8601(lubridate::as_datetime(datetime), + usetz = "Z"), collapse = "/") } diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index f0a63134..44d4e2e1 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -34,7 +34,7 @@ getWebServiceData <- function(obs_url, ...) { obs_url <- httr2::req_retry(obs_url, backoff = ~ 5, max_tries = 3) obs_url <- httr2::req_headers(obs_url, - `Accept-Encoding` = c("compress", "gzip")) + `Accept-Encoding` = c("compress", "gzip")) url_method <- "GET" if(!is.null(obs_url$body)){ diff --git a/R/importWaterML1.R b/R/importWaterML1.R index 2ad98d46..8721c68a 100644 --- a/R/importWaterML1.R +++ b/R/importWaterML1.R @@ -450,7 +450,7 @@ check_if_xml <- function(obs_url) { } else if (inherits(obs_url, c("xml_node", "xml_nodeset"))) { returnedDoc <- obs_url } else { - doc <- getWebServiceData(obs_url, encoding = "gzip") + doc <- getWebServiceData(obs_url) if (is.null(doc)) { return(invisible(NULL)) } diff --git a/R/readNWISunit.R b/R/readNWISunit.R index 13728aa3..67be5309 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -493,16 +493,16 @@ readNWISmeas <- function(siteNumbers, #' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() -#' site_id <- "434400121275801" +#' #site_id <- "434400121275801" #' \donttest{ -#' data <- readNWISgwl(site_id) -#' sites <- c("434400121275801", "375907091432201") -#' data2 <- readNWISgwl(sites, "", "") -#' data3 <- readNWISgwl("420125073193001", "", "") +#' #data <- readNWISgwl(site_id) +#' #sites <- c("434400121275801", "375907091432201") +#' #data2 <- readNWISgwl(sites, "", "") +#' #data3 <- readNWISgwl("420125073193001", "", "") #' # handling of data where date has no day -#' data4 <- readNWISgwl("425957088141001", startDate = "1980-01-01") +#' #data4 <- readNWISgwl("425957088141001", startDate = "1980-01-01") #' -#' data5 <- readNWISgwl("263819081585801", parameterCd = "72019") +#' #data5 <- readNWISgwl("263819081585801", parameterCd = "72019") #' } readNWISgwl <- function(siteNumbers, startDate = "", @@ -510,6 +510,10 @@ readNWISgwl <- function(siteNumbers, parameterCd = NA, convertType = TRUE, tz = "UTC") { + .Deprecated(new = "read_waterdata_field_measurements.", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_field_measurements.") + message(new_nwis_message()) url <- constructNWISURL( diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R new file mode 100644 index 00000000..0c5d3fe3 --- /dev/null +++ b/R/read_waterdata_field_measurements.R @@ -0,0 +1,101 @@ +#' Get USGS Field Measurement Water Data +#' +#' @description `r get_description("field-measurements")` +#' +#' @export +#' @param monitoring_location_id `r get_params("field-measurements")$monitoring_location_id` +#' @param parameter_code `r get_params("field-measurements")$parameter_code` +#' @param observing_procedure_code `r get_params("field-measurements")$observing_procedure_code` +#' @param time `r get_params("field-measurements")$time` +#' @param value `r get_params("field-measurements")$value` +#' @param unit_of_measure `r get_params("field-measurements")$unit_of_measure` +#' @param approval_status `r get_params("field-measurements")$approval_status` +#' @param last_modified `r get_params("field-measurements")$last_modified` +#' @param qualifier `r get_params("field-measurements")$qualifier` +#' @param field_visit_id `r get_params("field-measurements")$field_visit_id` +#' @param observing_procedure `r get_params("field-measurements")$observing_procedure` +#' @param vertical_datum `r get_params("field-measurements")$vertical_datum` +#' @param measuring_agency `r get_params("field-measurements")$measuring_agency` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r schema <- check_OGC_requests(endpoint = "field-measurements", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param limit The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 10000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param max_results The optional maximum number of rows to return. This value +#' must be less than the requested limit. +#' @param skipGeometry This option can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function +#' will convert the data to dates and qualifier to string vector. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' site <- "USGS-02238500" +#' field_data_sf <- read_waterdata_field_measurements(monitoring_location_id = site) +#' +#' groundwater <- read_waterdata_field_measurements(monitoring_location_id = "USGS-375907091432201") +#' +#' gwl_data <- read_waterdata_field_measurements(monitoring_location_id = "USGS-375907091432201", +#' parameter_code = "72019", +#' skipGeometry = TRUE) +#' +#' gwl_data_period <- read_waterdata_field_measurements( +#' monitoring_location_id = "USGS-375907091432201", +#' parameter_code = "72019", +#' time = "P20Y") +#' +#' multi_site <- read_waterdata_field_measurements( +#' monitoring_location_id = c("USGS-451605097071701", +#' "USGS-263819081585801"), +#' parameter_code = c("62611", "72019")) +#' +#' old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", +#' time = c("1980-01-01", NA)) +#' +#' +#' } +read_waterdata_field_measurements <- function(monitoring_location_id = NA_character_, + parameter_code = NA_character_, + observing_procedure_code = NA_character_, + properties = NA_character_, + field_visit_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + value = NA, + last_modified = NA_character_, + observing_procedure = NA_character_, + vertical_datum = NA_character_, + measuring_agency = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + convertType = TRUE){ + + service <- "field-measurements" + output_id <- "field_measurement_id" + + args <- mget(names(formals())) + return_list <- get_ogc_data(args, + output_id, + service) + + return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + + return(return_list) +} + + + diff --git a/R/read_waterdata_metadata.R b/R/read_waterdata_metadata.R index 94ddf7f5..9acb5102 100644 --- a/R/read_waterdata_metadata.R +++ b/R/read_waterdata_metadata.R @@ -27,6 +27,7 @@ #' coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") #' coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") #' coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") +#' huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") #' national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") #' parameter_codes <- read_waterdata_metadata("parameter-codes") #' reliability_codes <- read_waterdata_metadata("reliability-codes") diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index 351383e1..cacc2200 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -16,6 +16,7 @@ #' @param thresholds `r get_params("time-series-metadata")$thresholds` #' @param unit_of_measure `r get_params("time-series-metadata")$unit_of_measure` #' @param primary `r get_params("time-series-metadata")$primary` +#' @param parent_time_series_id `r get_params("time-series-metadata")$parent_time_series_id` #' @param web_description `r get_params("time-series-metadata")$web_description` #' @param properties A vector of requested columns to be returned from the query. #' Available options are: @@ -69,6 +70,7 @@ read_waterdata_ts_meta <- function(monitoring_location_id = NA_character_, thresholds = NA, sublocation_identifier = NA_character_, primary = NA_character_, + parent_time_series_id = NA_character_, time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, diff --git a/R/whatNWISsites.R b/R/whatNWISsites.R index 8ece54c8..8aae1b9a 100644 --- a/R/whatNWISsites.R +++ b/R/whatNWISsites.R @@ -87,7 +87,7 @@ whatNWISsites <- function(...) { POST = POST, format = "mapper") - rawData <- getWebServiceData(urlCall, encoding = "gzip") + rawData <- getWebServiceData(urlCall) if (is.null(rawData)) { return(invisible(NULL)) } diff --git a/R/whatWQPdata.R b/R/whatWQPdata.R index 495bf08b..3455c5e2 100644 --- a/R/whatWQPdata.R +++ b/R/whatWQPdata.R @@ -197,19 +197,37 @@ whatWQPmetrics <- function(..., #' lakeSites_chars <- whatWQPdata( #' siteType = "Lake, Reservoir, Impoundment", #' countycode = "US:55:025", convertType = FALSE) -#' } +#' #' #' bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) #' what_bb <- whatWQPdata(bBox = bbox) -#' +#' } whatWQPdata <- function(..., convertType = TRUE) { - values <- readWQPdots(..., legacy = TRUE) + args <- convertLists(...) + if("legacy" %in% names(args)){ + if (!args$legacy) { + stop("There is not an equivalent 'whatWQPdata' offering for legacy=FALSE.") + } + args <- args[names(args) != "legacy"] + } + + values <- readWQPdots(args, legacy = TRUE) + service <- values[["service"]] values <- values[["values"]] - - if (any(c("tz", "service", "mimeType") %in% names(values))){ - values <- values[!(names(values) %in% c("tz", "service", "mimeType"))] + + # Not sure if there's a geojson option with WQX3 + wqp_message() + + if (grepl("WQX", service)) { + stop("There is not an equivalent 'whatWQPdata' offering for WQX3 services.") + } else if (service != "Result") { + message("service argument is not used in whatWQPdata and will be ignored") + } + + if (any(c("tz", "mimeType") %in% names(values))){ + values <- values[!(names(values) %in% c("tz", "mimeType"))] } POST <- FALSE @@ -238,9 +256,6 @@ whatWQPdata <- function(..., POST = POST, mimeType = "geojson") - # Not sure if there's a geojson option with WQX3 - wqp_message() - doc <- getWebServiceData(baseURL) if (is.null(doc)) { diff --git a/README.Rmd b/README.Rmd index ed220984..6e4a4200 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,7 +44,7 @@ If you have additional questions about these changes, email CompTools@usgs.gov. 2. Get daily USGS data (for example, mean daily discharge). Start here: `?read_waterdata_daily` -3. Get USGS groundwater data. Start here: `?readNWISgwl` +3. Get USGS groundwater data. Start here: `?read_waterdata_field_measurements` 4. Get discrete water quality data from a cooperative service that integrates publicly available water-quality data from the USGS, EPA, and over 400 state, federal, tribal, and local agencies. Start here: `?readWQPdata` diff --git a/README.md b/README.md index c5c40525..1f133dc3 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,8 @@ If you have additional questions about these changes, email 2. Get daily USGS data (for example, mean daily discharge). Start here: `?read_waterdata_daily` -3. Get USGS groundwater data. Start here: `?readNWISgwl` +3. Get USGS groundwater data. Start here: + `?read_waterdata_field_measurements` 4. Get discrete water quality data from a cooperative service that integrates publicly available water-quality data from the USGS, EPA, @@ -160,7 +161,7 @@ citation(package = "dataRetrieval") #> #> De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., #> 2025, dataRetrieval: R packages for discovering and retrieving water -#> data available from Federal hydrologic web services, v.2.7.19, +#> data available from Federal hydrologic web services, v.2.7.20, #> doi:10.5066/P9X4L3GE #> #> A BibTeX entry for LaTeX users is @@ -170,7 +171,7 @@ citation(package = "dataRetrieval") #> title = {dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services}, #> publisher = {U.S. Geological Survey}, #> address = {Reston, VA}, -#> version = {2.7.19}, +#> version = {2.7.20}, #> institution = {U.S. Geological Survey}, #> year = {2025}, #> doi = {10.5066/P9X4L3GE}, @@ -195,15 +196,15 @@ NWIScitation <- create_NWIS_bib(dv) NWIScitation #> U.S. Geological Survey (2025). _National Water Information System data #> available on the World Wide Web (USGS Water Data for the Nation)_. -#> doi:10.5066/F7P55KJN , Accessed Jun -#> 27, 2025, +#> doi:10.5066/F7P55KJN , Accessed Jul +#> 21, 2025, #> . print(NWIScitation, style = "Bibtex") #> @Manual{, #> title = {National Water Information System data available on the World Wide Web (USGS Water Data for the Nation)}, #> author = {{U.S. Geological Survey}}, #> doi = {10.5066/F7P55KJN}, -#> note = {Accessed Jun 27, 2025}, +#> note = {Accessed Jul 21, 2025}, #> year = {2025}, #> url = {https://waterservices.usgs.gov/nwis/dv/?site=09010500&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=1851-01-01}, #> } @@ -227,14 +228,14 @@ WQPcitation <- create_WQP_bib(SC) WQPcitation #> National Water Quality Monitoring Council (2025). _Water Quality #> Portal_. doi:10.5066/P9QRKUVJ , -#> Accessed Jun 27, 2025, +#> Accessed Jul 21, 2025, #> . print(WQPcitation, style = "Bibtex") #> @Manual{, #> title = {Water Quality Portal}, #> author = {{National Water Quality Monitoring Council}}, #> doi = {10.5066/P9QRKUVJ}, -#> note = {Accessed Jun 27, 2025}, +#> note = {Accessed Jul 21, 2025}, #> year = {2025}, #> url = {https://www.waterqualitydata.us/data/Result/search?siteid=USGS-05288705&count=no&pCode=00300&mimeType=csv}, #> } diff --git a/_pkgdown.yml b/_pkgdown.yml index abc32176..7f11db90 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -26,22 +26,22 @@ navbar: href: articles/Status.html - text: Function Help href: reference/index.html - - text: Water Quality Data + - text: Tutorials menu: - - text: Samples Data - href: articles/samples_data.html - - text: Changes to QW - href: articles/qwdata_changes.html - - text: USGS QW Status - href: articles/Status.html - - text: QW Development Plan - href: articles/wqx3_development_plan.html + - text: Basic Tutorial + href: articles/tutorial.html + - text: Basic Tutorial (slides) + href: articles/basic_slides.html + - text: Changes to dataRetrieval (slides) + href: articles/changes_slides.html - text: Additional Articles menu: - - text: Tutorial - href: articles/tutorial.html - text: USGS Water Data APIs href: articles/read_waterdata_functions.html + - text: Samples Data + href: articles/samples_data.html + - text: Changes to QW + href: articles/qwdata_changes.html - text: Background href: articles/dataRetrieval.html - text: Pivot Data @@ -72,6 +72,7 @@ reference: - read_waterdata_ts_meta - read_waterdata_monitoring_location - read_waterdata_latest_continuous + - read_waterdata_field_measurements - read_waterdata_parameter_codes - read_waterdata_metadata - read_waterdata diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 00000000..5de3e287 --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,8 @@ +project: + output-dir: public + render: + - tutorials/basic_slides_deck.qmd + - tutorials/changes_slides_deck.qmd + +toc: false + diff --git a/inst/CITATION b/inst/CITATION index f028f67f..026d9ef6 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -39,7 +39,7 @@ bibentry(bibtype = "Manual", title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.20", + version = "2.7.21", institution = "U.S. Geological Survey", year = 2025, doi = "10.5066/P9X4L3GE", diff --git a/man/readNWISgwl.Rd b/man/readNWISgwl.Rd index 00d19b10..130aa603 100644 --- a/man/readNWISgwl.Rd +++ b/man/readNWISgwl.Rd @@ -84,16 +84,16 @@ See \url{https://waterservices.usgs.gov/docs/groundwater-levels/} for more infor } \examples{ \dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -site_id <- "434400121275801" +#site_id <- "434400121275801" \donttest{ -data <- readNWISgwl(site_id) -sites <- c("434400121275801", "375907091432201") -data2 <- readNWISgwl(sites, "", "") -data3 <- readNWISgwl("420125073193001", "", "") +#data <- readNWISgwl(site_id) +#sites <- c("434400121275801", "375907091432201") +#data2 <- readNWISgwl(sites, "", "") +#data3 <- readNWISgwl("420125073193001", "", "") # handling of data where date has no day -data4 <- readNWISgwl("425957088141001", startDate = "1980-01-01") +#data4 <- readNWISgwl("425957088141001", startDate = "1980-01-01") -data5 <- readNWISgwl("263819081585801", parameterCd = "72019") +#data5 <- readNWISgwl("263819081585801", parameterCd = "72019") } \dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd new file mode 100644 index 00000000..a33f4393 --- /dev/null +++ b/man/read_waterdata_field_measurements.Rd @@ -0,0 +1,134 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_field_measurements.R +\name{read_waterdata_field_measurements} +\alias{read_waterdata_field_measurements} +\title{Get USGS Field Measurement Water Data} +\usage{ +read_waterdata_field_measurements( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + observing_procedure_code = NA_character_, + properties = NA_character_, + field_visit_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + value = NA, + last_modified = NA_character_, + observing_procedure = NA_character_, + vertical_datum = NA_character_, + measuring_agency = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + convertType = TRUE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500).} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://help.waterdata.usgs.gov/codes-and-parameters/parameters}.} + +\item{observing_procedure_code}{A short code corresponding to the observing procedure for the field measurement.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified} + +\item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} + +\item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{qualifier}{This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated.} + +\item{value}{The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected.} + +\item{observing_procedure}{Water measurement or water-quality observing procedure descriptions.} + +\item{vertical_datum}{The datum used to determine altitude and vertical position at the monitoring location. \href{https://help.waterdata.usgs.gov/code/alt_datum_cd_query?fmt=html}{A list of codes is available.}} + +\item{measuring_agency}{The agency performing the measurement.} + +\item{skipGeometry}{This option can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information.} + +\item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{limit}{The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 10000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{max_results}{The optional maximum number of rows to return. This value +must be less than the requested limit.} + +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function +will convert the data to dates and qualifier to string vector.} +} +\description{ +Field measurements are physically measured values collected during a visit to the monitoring location. Field measurements consist of measurements of gage height and discharge, and readings of groundwater levels, and are primarily used as calibration readings for the automated sensors collecting continuous data. They are collected at a low frequency, and delivery of the data in WDFN may be delayed due to data processing time. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ +site <- "USGS-02238500" +field_data_sf <- read_waterdata_field_measurements(monitoring_location_id = site) + +groundwater <- read_waterdata_field_measurements(monitoring_location_id = "USGS-375907091432201") + +gwl_data <- read_waterdata_field_measurements(monitoring_location_id = "USGS-375907091432201", + parameter_code = "72019", + skipGeometry = TRUE) + +gwl_data_period <- read_waterdata_field_measurements( + monitoring_location_id = "USGS-375907091432201", + parameter_code = "72019", + time = "P20Y") + +multi_site <- read_waterdata_field_measurements( + monitoring_location_id = c("USGS-451605097071701", + "USGS-263819081585801"), + parameter_code = c("62611", "72019")) + +old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", + time = c("1980-01-01", NA)) + + +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_metadata.Rd b/man/read_waterdata_metadata.Rd index e0c8fd6b..116830cc 100644 --- a/man/read_waterdata_metadata.Rd +++ b/man/read_waterdata_metadata.Rd @@ -38,6 +38,7 @@ aquifer_types <- read_waterdata_metadata("aquifer-types") coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") +huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") parameter_codes <- read_waterdata_metadata("parameter-codes") reliability_codes <- read_waterdata_metadata("reliability-codes") diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index 5e002be9..00210688 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -19,6 +19,7 @@ read_waterdata_ts_meta( thresholds = NA, sublocation_identifier = NA_character_, primary = NA_character_, + parent_time_series_id = NA_character_, time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, @@ -37,7 +38,7 @@ read_waterdata_ts_meta( \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description} +geometry, id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description, parent_time_series_id} \item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} @@ -89,6 +90,8 @@ Only features that have a \code{end} that intersects the value of datetime are s \item{primary}{A flag identifying if the time series is a "primary" time series. "Primary" time series (which have this flag) are standard observations which undergo \href{https://www.usgs.gov/survey-manual/5028-fundamental-science-practices-review-and-approval-scientific-data-release}{Bureau review and approval processes}. Non-primary time series, which will have missing values for "primary", are provisional datasets made available to meet the need for timely best science and to assist with daily operations which need real-time information. Non-primary time series data are only retained by this system for 120 days. See the \href{https://waterdata.usgs.gov/provisional-data-statement/}{USGS Provisional Data Statement} for more information.} +\item{parent_time_series_id}{The unique identifier representing the parent or "upchain" time series that a daily values time series is generated from. Daily values time series have one and only one parent time series.} + \item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} \item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} diff --git a/man/whatWQPdata.Rd b/man/whatWQPdata.Rd index 9e8d8f7f..58321ae8 100644 --- a/man/whatWQPdata.Rd +++ b/man/whatWQPdata.Rd @@ -56,10 +56,11 @@ lakeSites <- whatWQPdata(siteType = "Lake, Reservoir, Impoundment", lakeSites_chars <- whatWQPdata( siteType = "Lake, Reservoir, Impoundment", countycode = "US:55:025", convertType = FALSE) -} + bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) what_bb <- whatWQPdata(bBox = bbox) +} \dontshow{\}) # examplesIf} } \seealso{ diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index 96e02690..f54f9c7d 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -2,6 +2,7 @@ context("General functions") test_that("General USGS retrievals working", { testthat::skip_on_cran() + testthat::skip_on_ci() cql <- '{ "op": "and", @@ -62,7 +63,8 @@ test_that("General USGS retrievals working", { test_that("General NWIS retrievals working", { testthat::skip_on_cran() - skip_on_ci() + testthat::skip_on_ci() + multiSite <- readNWISdata( sites = c("04025500", "040263491"), service = "iv", parameterCd = "00060", @@ -262,10 +264,11 @@ test_that("General NWIS retrievals working", { }) test_that("read_waterdata_ts_meta", { - + testthat::skip_on_cran() + testthat::skip_on_ci() # no service specified: availableData <- read_waterdata_ts_meta(monitoring_location_id = "USGS-05114000") - expect_equal(ncol(availableData), 17) + expect_equal(ncol(availableData), 18) uvData <- read_waterdata_ts_meta(monitoring_location_id = "USGS-05114000", computation_period_identifier = c("Points")) @@ -297,6 +300,7 @@ test_that("read_waterdata_ts_meta", { test_that("General WQP retrievals working", { testthat::skip_on_cran() + testthat::skip_on_ci() nameToUse <- "pH" pHData <- readWQPdata(siteid = "USGS-04024315", characteristicName = nameToUse, @@ -390,13 +394,17 @@ test_that("zeroPad handles NAs", { test_that("Dates with no days can be handled", { testthat::skip_on_cran() - empty_df <- readNWISgwl("425957088141001", startDate = "1980-01-01") + testthat::skip_on_ci() + + empty_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", + time = c("1980-01-01", NA)) expect_true(nrow(empty_df) > 0) }) context("whatWQPsamples") test_that("whatWQPsamples working", { testthat::skip_on_cran() + testthat::skip_on_ci() # The warning is caused by a confirmed bug in WQP siteInfo <- whatWQPsamples(siteid = "USGS-01594440") expect_true(nrow(siteInfo) > 0) @@ -405,6 +413,7 @@ test_that("whatWQPsamples working", { context("whatWQPmetrics") test_that("whatWQPmetrics working", { testthat::skip_on_cran() + testthat::skip_on_ci() type <- "Stream" siteInfo <- whatWQPmetrics(countycode = "US:55:025", siteType = type) expect_true(ncol(siteInfo) >= 21) @@ -413,6 +422,7 @@ test_that("whatWQPmetrics working", { context("whatWQPdata") test_that("whatWQPdata working", { testthat::skip_on_cran() + testthat::skip_on_ci() site1 <- whatWQPdata(siteid = "USGS-01594440") expect_is(site1, "data.frame") @@ -429,6 +439,8 @@ test_that("whatWQPdata working", { context("read_waterdata_ts_meta") test_that("read_waterdata_ts_meta working", { testthat::skip_on_cran() + testthat::skip_on_ci() + siteListOhio <- read_waterdata_monitoring_location(state_name = "Ohio") siteListPhos <- read_waterdata_ts_meta(bbox = sf::st_bbox(siteListOhio), parameter_code = "00665") diff --git a/tests/testthat/tests_nldi.R b/tests/testthat/tests_nldi.R index 7aae6d05..6273d40d 100644 --- a/tests/testthat/tests_nldi.R +++ b/tests/testthat/tests_nldi.R @@ -26,6 +26,7 @@ test_that("NLDI messageing NULL", { test_that("NLDI offerings...", { skip_on_cran() + skip_on_ci() expect_true(nrow(get_nldi_sources()) > 1) }) diff --git a/tests/testthat/tests_samples.R b/tests/testthat/tests_samples.R index da145998..a219ebcc 100644 --- a/tests/testthat/tests_samples.R +++ b/tests/testthat/tests_samples.R @@ -69,6 +69,7 @@ test_that("samples-data project working", { context("summary_waterdata_samples") test_that("summary_waterdata_samples working", { testthat::skip_on_cran() + testthat::skip_on_ci() site1 <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-01594440") expect_is(site1, "data.frame") @@ -76,8 +77,8 @@ test_that("summary_waterdata_samples working", { }) test_that("profiles", { - testthat::skip_on_cran() + testthat::skip_on_ci() # Data profiles: "Organization Data" org_data <- read_waterdata_samples( countyFips = countyCdLookup("WI", "Dane"), diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 5269d4b9..6607a4f1 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -106,9 +106,9 @@ test_that("peak, rating curves, surface-water measurements", { url <- httr2::request("https://waterservices.usgs.gov/nwis/site/?format=rdb&seriesCatalogOutput=true&sites=05114000") x <- importRDB1(url) - siteID <- "263819081585801" - gwl_1 <- readNWISgwl(siteID) - expect_equal(unique(gwl_1$site_no), siteID) + siteID <- "USGS-263819081585801" + gwl_1 <- read_waterdata_field_measurements(monitoring_location_id = siteID) + expect_equal(unique(gwl_1$monitoring_location_id), siteID) # No data: stations <- "06011000" diff --git a/tutorials/_metadata.yml b/tutorials/_metadata.yml new file mode 100644 index 00000000..12a33cb5 --- /dev/null +++ b/tutorials/_metadata.yml @@ -0,0 +1,5 @@ +format: + revealjs: + menu: false + progress: false +search: false \ No newline at end of file diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd new file mode 100644 index 00000000..9c256769 --- /dev/null +++ b/tutorials/basic_slides_deck.qmd @@ -0,0 +1,1024 @@ +--- +title: "USGS Water Quality Data
Introduction to dataRetrieval" +author: "Laura DeCicco" +image: hex_logo.png +format: + revealjs: + theme: [simple, custom.scss] + toc: false + slide-number: true + logo: hex_logo.png + footer: + preview-links: auto +title-slide-attributes: + data-background-image: hex_logo.png + data-background-size: 15% + data-background-position: 2% 2% +editor: source +editor_options: + chunk_output_type: console +execute: + echo: true + warning: false + message: false +--- + +```{r} +#| echo: false +#| include: false +# library(dataRetrieval) +library(ggplot2) +library(dplyr) +options(dplyr.summarise.inform = FALSE) + +dt_me <- function(x, + page_length = 8, + paging = TRUE, + font = "0.7em", + escape = TRUE){ + DT::datatable(x, + rownames = FALSE, + options = list(pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0("$(this.api().table().container()).css({'font-size': '", + font, "'});"), + "}")), escape = escape) +} + +theme_set(theme_grey(base_size = 24)) +update_geom_defaults("point", list(size = 3)) + +``` + + +## Introduction {background-image="images/hex_logo.png" background-size="15%" background-position="90% 90%" } + +In this ~90 minute introduction, the goal is: + +- Introduce the modern `dataRetrieval` workflows. + +- The intended audience is someone: + + - New to `dataRetrieval` + + - Has some R experience + +::: footer + +::: + + +## RStudio Orientation + +By default will look like: + +![](images/default_rstudio.png) + +## RStudio Appearances + +Go to Tools -> Global Options -> Appearances to change style. + +![](images/apperences.png) + +## RStudio Orientation {.smaller} + +:::: {.columns} + +::: {.column width="30%"} + +1. Create scripts. + +2. See code run. + +3. See what variables are loaded + + - Click on a data frame to View + +4. Plots and more + +::: + +::: {.column width="70%"} + +![](images/dark_mode.png) +::: + +:::: + +::: footer + +::: + + +## dataRetrieval: R-package for US water data {.smaller} + +:::: {.columns} + +::: {.column width="50%"} + +**USGS Water Data APIs * ** + +- Surface water levels + +- Groundwater levels + +- Site metadata + +- Peak flows + +- Rating curves + +- Discrete water-quality data + +::: + +::: {.column width="50%"} + +**Water Quality Portal (WQP) Data** + +- Discrete water-quality data + +- USGS and non-USGS data + +::: + +:::: + +## Installation + +`dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: + +```{r} +#| echo: true +#| eval: false +install.packages("dataRetrieval") + +``` + +Then each time you open R, you'll need to load the library: + +```{r} +#| message: true +library(dataRetrieval) +``` + +::: footer + +::: + +## Installation Notes + +* Lots of R-package tips in [Best Practices](https://water.code-pages.usgs.gov/wq-visualizations-tools/training/postsbest_practices/best_practices.html#sec-packages) + +* **Warning messages**: usually can ignore! + +* **Error messages**: can't ignore! + + +## dataRetrieval: External Documentation + +![](images/documentation_1.png){width="1000" height="500"} + +::: footer + +::: + +## dataRetrieval: External Documentation + +![](images/documentation_2.png){width="1000" height="500"} + +::: footer + +::: + +## dataRetrieval: External Documentation + +![](images/documentation_3.png){width="1000" height="500"} + +## Documentation within R: function help pages {.smaller} + +Within R, you can call help files for any `dataRetrieval` function: + +```{r} +#| echo: true +#| eval: false +?readWQPdata +``` + +:::: {.columns} + +::: {.column width="50%"} + +Click here to open a new window: + +![](images/help_file_2.png) + + +::: + +::: {.column width="50%"} + +Scroll down to the "Examples" to see how each function can be run. + +Examples + +```{r} +#| eval: false +# Legacy: +nameToUse <- "pH" +pHData <- readWQPdata(siteid = "USGS-04024315", + characteristicName = nameToUse) +ncol(pHData) +attr(pHData, "siteInfo") +attr(pHData, "queryTime") +attr(pHData, "url") +``` + +::: + +:::: + +::: footer + +::: + +## Exercise 1: Orientation {.smaller} + +::: {.panel-tabset} + +### Challenge + +1. Open RStudio + +2. Install `dataRetrieval`, `dplyr`, `ggplot2`, and `data.table` (if they are not already installed). + +3. Load `dataRetrieval` + +4. Open the help file for the function `read_waterdata_daily` + +5. Navigate to and find the list of function help files and explore some articles in "Additional Articles" + + +### Solution: + + +```{r fig.height=7} +#| eval: false +install.packages(c("dataRetrieval", "dplyr", "ggplot2", "data.table")) +library(dataRetrieval) +?read_waterdata_daily +``` + +::: + +::: footer + +::: + +## dataRetrieval Updates {background-image="images/hex_logo.png" background-size="15%" background-position="85% 80%" } + +Are you a seasoned `dataRetrieval` user? + +Here are resources for recent major changes: + +* [Changes to dataRetrieval](https://doi-usgs.github.io/dataRetrieval/articles/changes_slides.html) + +* [Water Data API Introduction](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) + +* [Samples Data Introduction](https://doi-usgs.github.io/dataRetrieval/articles/samples_data.html) + +## What's New? {.smaller} + +There's been a lot of changes to `dataRetrieval` over the past year. If you'd like to see an overview of those changes, visit: [Changes to dataRetrieval](https://doi-usgs.github.io/dataRetrieval/articles/changes_slides.html) + +Biggest changes: + +* NWIS servers will be shut down, so all `readNWIS` functions will eventually stop working + +* `read_waterdata` functions are modern and should be used when possible + +* The "USGS Water Data APIs" are the new home for USGS data + +::: footer + +::: + +## USGS Water Data API Token + +* The Water Data APIs limit how many queries a single IP address can make per hour + +* You **can** run new `dataRetrieval` functions without a token + +* You **might** run into errors quickly. If you (or your IP!) have exceeded the quota, you will see: + +``` +! HTTP 429 Too Many Requests. + • You have exceeded your rate limit. Make sure you provided your API key from https://api.waterdata.usgs.gov/signup/, then either try again later or contact us at https://waterdata.usgs.gov/questions-comments/?referrerUrl=https://api.waterdata.usgs.gov for assistance. +``` + +## USGS Water Data API Token + +1. Request a USGS Water Data API Token: + +2. Save it in a safe place (KeePass or other password management tool) + +3. Add it to your .Renviorn file as API_USGS_PAT. + +4. Restart R + +5. Check that it worked by running (you should see your token printed in the Console): + +```{r} +#| eval: false +Sys.getenv("API_USGS_PAT") +``` + +See next slide for a demonstration. + +::: footer + +::: + +## USGS Water Data API Token: Example {.smaller} + +My favorite method to do add your token to .Renviron is to use the `usethis` package. Let's pretend the token sent you was "abc123": + +1. Run in R: +```{r} +#| echo: true +#| eval: false +usethis::edit_r_environ() +``` + +2. Add this line to the file that opens up: + +```{r} +#| eval: false +API_USGS_PAT = "abc123" +``` + +3. Save that file using the save button + +4. Restart R/RStudio. + +5. Run after restarting R: + +```{r} +#| eval: false +Sys.getenv("API_USGS_PAT") +``` + +## USGS Water Data API Token: Example {.smaller .nostretch} + +![](images/save_token.png){width="50%"} + +After save and restart, check that it worked by running: + +```{r} +#| eval: false +Sys.getenv("API_USGS_PAT") +``` + +::: footer + +::: + +## USGS Basic Retrievals {.smaller} + +The USGS uses various codes for basic retrievals. These codes can have leading zeros, therefore they need to be a character surrounded in quotes ("00060"). + +* Site ID (often 8 or 15-digits) +* Parameter Code (5 digits) + + Full list: `read_waterdata_parameter_codes()` +* Statistic Code (for daily values) + + Full list: `read_metadata("statistic-codes")` + +## USGS Basic Retrievals Parameter and Statistic Codes + +Here are some examples of a few common codes: + + +```{r echo=FALSE, eval=TRUE} +library(knitr) + +df <- data.frame( + pCode = c("00060", "00065", "00010", "00400"), + shName = c("Discharge", "Gage Height", "Temperature", "pH") +) + +names(df) <- c("Parameter Code", "Short Name") + +df2 <- data.frame( + pCode = c("00001", "00002", "00003", "00008"), + shName = c("Maximum", "Minimum", "Mean", "Median") +) + +names(df2) <- c("Statistic Code", "Short Name") + +knitr::kable(list(df, df2)) +``` + + + +## Let's Go! {.smaller} + +We're going walk through 3 retrievals: + +* **Workflow 1**: Daily Data + + - Uses the new USGS Water Data API + + - Modern data access point going forward + +* **Workflow 2**: Discrete Data + + - Uses new USGS Samples Data + + - Modern data access point going forward + +* **Workflow 3**: Join Daily and Discrete + +* **Workflow 4**: Continuous Data + + - Uses the NWIS web services + + - Will be deprecated, this fall we'll have `read_waterdata_continuous` + +* **Workflow 5**: Join Continuous and Discrete + +::: footer + +::: + +## Workflow 1: Daily data for known site + +Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. + +```{r} +#| message: true +library(dataRetrieval) +site <- "USGS-09405500" +pcode <- "00060" # Discharge +stat_cd <- "00003" # Mean +range <- c("2024-10-01", NA) + +df <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = pcode, + statistic_id = stat_cd, + time = range) + +``` + +::: footer + +::: + +## Workflow 1: Look at Daily Data {.smaller} + +In RStudio, click on the data frame in the upper right Environment tab to open a Viewer. + +```{r} +#| echo: false + +dt_me(df |> + sf::st_drop_geometry(), + page_length = 3) + + +``` + +::: footer + +::: + +## Workflow 1: Plot Daily Data + +In our next session, we will dive into using `ggplot2` to visualize the data. Here is a sneak peak at a very simple plot: + +```{r} +#| echo: true +#| output-location: column +library(ggplot2) + +ggplot(data = df) + + geom_point(aes(x = time, + y = value, + color = approval_status)) + +``` + +## Water Data API Notes: Argument input + +Use your "tab" key! + +![](images/autocomplete.png) + +## Water Data API Notes: Arguments + +* When you look at the help file for the new functions, you’ll notice there are lots of possible inputs (arguments). + +* You **DO NOT** need to (and should not!) specify **all** of these parameters. + +* However, also consider what happens if you leave too many things blank. What do you suppose will be returned here? + +```{r} +#| eval: false +#| echo: true +discharge <- read_waterdata_daily(parameter_code = "00060", + statistic_id = "00003") + +``` + +::: {.fragment} + +::: {style="font-size: 75%;"} + +Since no list of sites or bounding box was defined, **ALL** the daily data in **ALL** the country with parameter code "00060" and statistic code "00003" will be returned. + +::: + +::: + +::: footer + +::: + + +## Water Data API Notes: time input {.smaller} + +:::: {.columns} + +::: {.column width="50%"} + +The "time" argument has a few options: + +* A single date (or date-time): "2024-10-01" or "2024-10-01T23:20:50Z" + +* A bounded interval: c("2024-10-01", "2025-07-02") + +* Half-bounded intervals: c("2024-10-01", NA) + +* Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + +::: + +::: {.column width="50%"} + +Here are a bunch of valid inputs: + +```{r} +#| code-line-numbers: "1-7|8-9|10-13|14-17" +# Ask for exact times: +time = "2025-01-01" +time = as.Date("2025-01-01") +time = "2025-01-01T23:20:50Z" +time = as.POSIXct("2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC") +# Ask for specific range +time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs +# Asking beginning of record to specific end: +time = c(NA, "2024-01-01") # or Date or POSIX +# Asking specific beginning to end of record: +time = c("2024-01-01", NA) # or Date or POSIX +# Ask for period +time = "P1M" # past month +time = "P7D" # past 7 days +time = "PT12H" # past hours +``` + +::: + +:::: + +## Workflow 2: Discrete data for known site + +Use your "tab" key! + +![](images/autocomplete_samples.png) + +## Workflow 2: Discrete data for known site + +Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal, VA ("USGS-01631000"). + +```{r} +#| message: true +site <- "USGS-01631000" +pcode <- "00660" + +qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem") +ncol(qw_data) +``` + +That's a LOT of columns that come back. We won't look at them here, but let's jump over to RStudio to look through the results. + +::: footer + +::: + +## USGS Samples Data Notes: Data Types and Profiles + +* There are 2 arguments that dictate what kind of data is returned + - "dataType" defines what kind of data comes back + - "dataProfile" defines what columns from that type come back + +## Data Types and Profiles {.smaller} + +```{r} +#| echo: false + +df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), + Description = c("Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query"), + dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count')) + +dt_me(df, escape = FALSE, paging = FALSE) + +``` + +::: footer + +::: + +## Workflow 2: Discrete data censoring + +Let's pull just a few columns out and look at those: + +```{r} +library(dplyr) + +qw_data_slim <- qw_data |> + select(Date = Activity_StartDate, + Result_Measure, + DL_cond = Result_ResultDetectionCondition, + DL_val = DetectionLimit_MeasureA, + DL_type = DetectionLimit_TypeA) |> + mutate(Result = if_else(!is.na(DL_cond), DL_val, Result_Measure), + Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected")) |> + arrange(Detected) + +``` + +* What is `|>`? It's a pipe! It says take 'this thing' and put it in 'that thing'. You'll also see `%>%` in code, it is also a pipe - they are basically the same. + +::: footer + +::: + +## Workflow 2: Discrete data censoring information {.smaller} + +```{r} +#| echo: false + +dt_me(qw_data_slim, page_length = 8, font = "0.7em") +``` + +::: footer + +::: + +## Workflow 3: Join Discrete and Daily + +* One common workflow is to join discrete data with daily data. + +* In this example, we will look at a site that measures both water quality parameters and has daily mean discharge. + +* We will use the `dplyr::left_join` to join the 2 data frames by a date. + +::: footer + +::: + +## Step 1: Get the data + +```{r} +site <- "USGS-04183500" +p_code_dv <- "00060" +stat_cd <- "00003" +p_code_qw <- "00665" +start_date <- "2015-07-03" +end_date <- "2025-07-03" + +qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, + usgsPCode = p_code_qw, + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem") + +dv_data <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = p_code_dv, + statistic_id = stat_cd, + time = c(start_date, end_date)) +``` + +## Step 2: Join + +```{r} +library(dplyr) + +little_dv <- dv_data |> + select(time, Flow = value, monitoring_location_id) + +qw_data_joined <- qw_data |> + left_join(little_dv, + by = c("Activity_StartDate" = "time")) +``` + +* "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type. + + +::: footer + +::: + +## Step 2: Join (cont.) + +* You could join on multiple columns: + +```{r} +#| eval: false +qw_data <- qw_data |> + left_join(little_dv, + by = c("Activity_StartDate" = "time", + "Location_Identifier" = "monitoring_location_id")) + +``` + +See `dplyr` documentation for lots of joining options, but I find `left_join` my "go-to" for straightforward joins. + +::: footer + +::: + +## Step 3: Inspect + +Let's take a quick peak: + +```{r} +#| output-location: column +ggplot(data = qw_data_joined) + + geom_point(aes(x = Flow, + y = Result_Measure)) + +``` + + +## Exercise 2: Joins {.smaller} + +::: {.panel-tabset} + +### Challenge + +`dplyr` comes with some data sets. To look at them run: + +```{r} +library(dplyr) +band_members <- band_members +band_instruments <- band_instruments +``` + +1. Run that code and view the 2 data frames to see what they look like. + +2. Join the instruments to the "band_members" by name. + +3. Join the members to the "band_instruments" by name. + +### Solution: + + +```{r} +band_members |> + left_join(band_instruments, by = "name") + +band_instruments |> + left_join(band_members, by = "name") + +``` + + + +::: + +::: footer + +::: + +## Workflow 4: Continuous data for known site + +* Continuous data is the high-frequency sensor data. + +* The function to get that data today is `readNWISuv` + +* As NWIS gets deprecated, we expect to have `read_waterdata_continuous` soon + +* We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. + +## Workflow 4: Continuous data for known site + +:::: {.columns} + +::: {.column width="70%"} + +```{r} +#| results: markup +site_id <- "11455508" +p_code_rt <- "99133" +start_date <- "2024-01-01" +end_date <- "2024-06-01" + +continuous_data <- readNWISuv(site_id, + p_code_rt, + start_date, + end_date) + +names(continuous_data) +``` + +::: + +::: {.column width="30%"} + +``` +[1] "agency_cd" +[2] "site_no" +[3] "dateTime" +[4] "X_99133_00000" +[5] "X_99133_00000_cd" +[6] "tz_cd" +``` +::: + +:::: + +``` +GET: https://nwis.waterservices.usgs.gov/nwis/iv/?site=11455508&format=waterml%2C1.1&ParameterCd=99133&startDT=2024-01-01&endDT=2024-06-01 +``` + +## Workflow 4: Inspect + +```{r} +#| output-location: column +ggplot(data = continuous_data) + + geom_point(aes(x = dateTime, + y = X_99133_00000)) +``` + +## Workflow 5: Join Discrete and Continuous + +That same site also measures discrete Nitrate plus Nitrite, which is parameter code "00631". Let's first grab that data: + +```{r} +#| message: true +discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-11455508", + usgsPCode = "00631", + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem") + +``` + +## Workflow 5: Join Discrete and Continuous + +* We now want to join the **closest** continuous sensor time with the discrete sample time. + +* This is trickier than joining by exact matches. + +* `dplyr` has a way, but it's complicated if you want the absolute closest in either direction + +* Another package `data.table` has a slick way to get the closest matches + +## Workflow 5: Join Discrete and Continuous + + +```{r} +#| code-line-numbers: "1|2-3|5|6|1-6" +library(data.table) +setDT(discrete_data)[, join_date := Activity_StartDateTime] +setDT(continuous_data)[, join_date := dateTime] + +closest_dt <- continuous_data[discrete_data, on = .(join_date), roll = "nearest"] +closest_dt <- data.frame(closest_dt) +``` + +::: footer + +::: + +## Workflow 5: Inspect + +```{r} +#| output-location: column +ggplot(data = closest_dt) + + geom_point(aes(x = Result_Measure, + y = X_99133_00000)) + + geom_abline() + + expand_limits(x = 0, y = 0) + + xlab("Discrete") + + ylab("Continuous") + +``` + + +## Data Discovery (if enough time!) + +The process for discovering data is a bit in flux with NWIS retiring. I expect a new process will be introduced soon. For now here are some options. + +1. `read_waterdata_ts_meta` discovers daily and continuous time series + +2. `summarize_waterdata_samples` discovers discrete data at specific monitoring locations + +The next slides will demo how to use those. + +## Data Discovery: Time Series {.smaller} + +```{r} +ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-04183500") +``` + +```{r} +#| echo: false + +dt_me(ts_available |> + sf::st_drop_geometry() |> + select(parameter_name, + parameter_code, statistic_id, begin, end, + computation_identifier), page_length = 6) + +``` + +::: footer + +::: + +## Data Discovery: Discrete {.smaller} + +```{r} +discrete_available <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") + +``` + +```{r} +#| echo: false + +dt_me(discrete_available |> + select(characteristicUserSupplied, + resultCount, activityCount, + firstActivity, mostRecentActivity), + page_length = 6) + +``` + +::: footer + +::: + +## characteristicUserSupplied {.smaller} + +* The column "characteristicUserSupplied" is internally referred to as "observed property". + +* characteristicUserSupplied can be an input to `read_waterdata_sample` + +```{r} +discrete1 <- read_waterdata_samples(characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500") +nrow(discrete1) +``` + + +* `summarize_waterdata_samples` may be adding a parameter code to the output in the future. + +## More Information {.smaller} + +- dataRetrieval repository: + - + - [Documentation](https://doi-usgs.github.io/dataRetrieval) + - [dataRetrieval New Features](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) + - [General Tutorial](https://rconnect.usgs.gov/NMC_dataRetrieval_1/dataRetrieval_1.html) + +- Contact: + - Computational Tools Email: comptools@usgs.gov + +:::: footer + +::: {style="font-size: 80%;"} + +Any use of trade, firm, or product name is for descriptive purposes only and does not imply endorsement by the U.S. Government. +::: + +:::: + diff --git a/tutorials/changes_slides_deck.qmd b/tutorials/changes_slides_deck.qmd new file mode 100644 index 00000000..32fcf0f9 --- /dev/null +++ b/tutorials/changes_slides_deck.qmd @@ -0,0 +1,976 @@ +--- +title: "Updates to dataRetrieval 2025" +author: "Laura DeCicco" +image: hex_logo.png +format: + revealjs: + theme: [simple, custom.scss] + toc: false + slide-number: true + logo: hex_logo.png + footer: + preview-links: auto +title-slide-attributes: + data-background-image: hex_logo.png + data-background-size: 15% + data-background-position: 2% 2% +editor: source +editor_options: + chunk_output_type: console +execute: + echo: true + warning: false +--- + +```{r} +#| echo: false +#| include: false +library(dataRetrieval) +library(ggplot2) +library(dplyr) +library(leaflet) +library(DT) +options(dplyr.summarise.inform = FALSE) +theme_set(theme_grey(base_size = 24)) +update_geom_defaults("point", list(size = 3)) + +dt_me <- function(x, + page_length = 8, + font = "0.7em", + escape = TRUE, + paging = TRUE){ + DT::datatable(x, + rownames = FALSE, + options = list(pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0("$(this.api().table().container()).css({'font-size': '", + font, "'});"), + "}")), escape = escape) +} + +``` + + +## Introduction {background-image="images/hex_logo.png" background-size="15%" background-position="90% 90%" } + +In this ~90 minute introduction, the goal is: + +- Introduce new `dataRetrieval` functions + +- The intended audience is someone: + + - Seasoned `dataRetrieval` user + + - AND/OR intermediate R user + + - Familiar with USGS water data + +New to `dataRetrieval`? [Introduction to dataRetrieval](https://water.code-pages.usgs.gov/wq-visualizations-tools/training/posts/Water_Quality_Data/A_Discover/A_Discover.html) + +::: footer + +::: + +## Why are we here? {.smaller} + +* NWIS servers are shutting down + + - That means all `readNWIS` functions will eventually stop working + + - Timeline is very uncertain, so we wanted to get information out on replacement functions ASAP. + + - Latest rumor: not before 2026, but performance degradation happening now + +* New `dataRetrieval` functions are available to replace the NWIS functions + + - `read_waterdata_` functions are the modern functions + + - They use the new [USGS Water Data APIs](https://api.waterdata.usgs.gov/ogcapi/v0/) + +::: footer + +::: + +## Installation {.smaller} + +The features shown in this presentation are available on the most recent CRAN update: + +```{r} +#| echo: true +#| eval: false +install.packages("dataRetrieval") + +``` + +Functions added when new API endpoints are introduced will initially be pushed to the "develop" branch on GitHub. To test those updates, using the `remotes` packages: + +```{r} +#| echo: true +#| eval: false +library(remotes) +install_github("DOI-USGS/dataRetrieval@develop") + +``` + +The "develop" branch WILL change frequently, and there are no promises of future behavior. + +::: footer + +::: + +## External Documentation + +![](images/documentation_1.png){width="1000" height="500"} + +::: footer + +::: + +## External Documentation + +![](images/documentation_2.png){width="1000" height="500"} + +::: footer + +::: + +## External Documentation + +![](images/documentation_3.png){width="1000" height="500"} + +## USGS Water Data OGC APIs: Current Functions {.smaller} + +Open Geospatial Consortium (OGC), a non-profit international organization that develops and promotes open standards for geospatial information. OGC-compliant interfaces to USGS water data: + +* [read_waterdata_monitoring_location](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_monitoring_location.html) - Monitoring location information + +* [read_waterdata_ts_meta](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_ts_meta.html) - Time series availability + +* [read_waterdata_daily](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_daily.html) - Daily data + +* [read_waterdata_latest_continuous](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_latest_continuous.html) - Latest continuous data + +* [read_waterdata_field_measurements](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html) - Latest continuous data + +* [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) - Generalized function + +* [read_waterdata_metadata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_metadata.html) - Metadata + +::: footer + +::: + + +## USGS Water Data API Token + +* The Water Data APIs limit how many queries a single IP address can make per hour + +* You **can** run new `dataRetrieval` functions without a token + +* You **might** run into errors quickly. If you (or your IP!) have exceeded the quota, you will see: + +``` +error: HTTP 403 Forbidden. +* Query request denied. Possible reasons include query exceeding server limits. +``` + +## USGS Water Data API Token + +1. Request a USGS Water Data API Token: + +2. Save it in a safe place (KeyPass or other password management tool) + +3. Add it to your .Renviron file as API_USGS_PAT. + +4. Restart R + +5. Run after restarting R: + +```{r} +#| eval: false +Sys.getenv("API_USGS_PAT") +``` + +See next slide for a demonstration. + +::: footer + +::: + +## Water Data API Token: Example {.smaller} + +My favorite method to do add your token to .Renviron is to use the `usethis` package. Let's pretend the token sent you was "abc123": + +1. Run in R: +```{r} +#| echo: true +#| eval: false +usethis::edit_r_environ() +``` + +2. Add this line to the file that opens up: + +``` +API_USGS_PAT = "abc123" +``` + +3. Save that file + +4. Restart R/RStudio. + +5. Check that it worked by running (you should see your token printed in the Console): + +```{r} +#| eval: false +Sys.getenv("API_USGS_PAT") +``` + +## Water Data API Token: Example + +![](images/save_token.png) + +## Water Data APIs: Initial Tips + +Use your "tab" key! + +![](images/autocomplete.png) + + + +## read_waterdata_monitoring_location + +Replaces `readNWISsite`: + +![](images/read_ml.png) + +* All the columns that you retrieve, you can also filter on. + +* You **should not** specify **all** of these parameters. + +* You **should not** specify **too few** of these parameters. + +::: footer + +::: + +## read_waterdata_monitoring_location + +Let's get all the monitoring locations for Dane County, Wisconsin: + +```{r} +#| message: true +site_info <- read_waterdata_monitoring_location(state_name = "Wisconsin", + county_name = "Dane County") +nrow(site_info) +``` + +::: {.callout-note collapse="true"} +## Note on county names +`read_waterdata_monitoring_location` requires "County" in the county_name argument. You can check county names using: +```{r} +#| eval: false +counties <- check_waterdata_sample_params(service = "counties") +``` +::: + +::: footer + +::: + +## read_waterdata_monitoring_location {.smaller .scrollable} + +```{r} +#| echo: false + +dt_me(site_info, 7, "0.6em") + +``` + +::: footer + +::: + +## read_waterdata_monitoring_location + +Now that we've seen the whole data set, maybe we realize in the future we can ask for just stream sites, and we only really need a few of those columns: + +```{r} +#| message: true +site_info_refined <- read_waterdata_monitoring_location( + state_name = "Wisconsin", + county_name = "Dane County", + site_type = "Stream", + properties = c("monitoring_location_id", + "monitoring_location_name", + "drainage_area", + "geometry")) +``` + +::: footer + +::: + +## Map It: ggplot2 + +"geometry" column means it's an `sf` object, and makes mapping easy! + +```{r} +#| output-location: default +library(ggplot2) +ggplot(data = site_info_refined) + + geom_sf() +``` + +::: footer + +::: + +## Map It: leaflet + +```{r} +#| output-location: slide +library(leaflet) +#default leaflet crs: +leaflet_crs <- "+proj=longlat +datum=WGS84" + +leaflet(data = site_info_refined |> + sf::st_transform(crs = leaflet_crs)) |> + addProviderTiles("CartoDB.Positron") |> + addCircleMarkers(popup = ~monitoring_location_name, + radius = 3, + opacity = 1) +``` + +## Removing `sf` + +* You can post-process the "geometry" column out, or convert it to lat/lon with the `sf` package: + +```{r} +no_sf_1 <- site_info_refined |> + sf::st_drop_geometry() + +longitude <- sf::st_coordinates(site_info_refined)[,1] +latitude <- sf::st_coordinates(site_info_refined)[,2] + +``` + +* You can declare `skipGeometry=TRUE` in the query to return a plain data frame with no geometry: + +```{r} +#| eval: false +no_sf <- read_waterdata_monitoring_location( + state_name = "Wisconsin", + county_name = "Dane County", + site_type = "Stream", + skipGeometry = TRUE) +``` + +::: footer + +::: + +## read_waterdata_ts_meta + +Time-Series Metadata. *Kind of* replaces `whatNWISdata`: + +![](images/read_ts_meta.png) + +```{r} +site_ts <- read_waterdata_ts_meta( + monitoring_location_id = "USGS-02238500") + +``` + +::: footer + +::: + +## read_waterdata_ts_meta {.smaller .scrollable} + +```{r} +#| echo: false + +dt_me(site_ts |> + sf::st_drop_geometry() |> + select(-web_description, + -parameter_description, + -monitoring_location_id, + -thresholds, + -computation_identifier), 6, "0.7em") + +``` + +::: footer + +::: + +## read_waterdata_ts_meta + +Let's get all the time series in Dane County, WI with daily mean (statistic_id = "00003") discharge (parameter code = "00060) or temperature (parameter code = "00010): + +```{r} +sites_available <- read_waterdata_ts_meta( + bbox = sf::st_bbox(site_info), + parameter_code = c("00060", "00010"), + statistic_id = c("00003")) + +``` + +::: {.callout-tip} +Geographic filters are limited to monitoring_location_id and bbox in "waterdata" functions *other* than `read_waterdata_monitoring_location`. + +Using `sf::st_bbox()` is a convenient way to take advantage of the spatial features integration. +::: + +::: footer + +::: + +## read_waterdata_ts_meta {.smaller .scrollable} + +```{r} +#| echo: false + +dt_me(sites_available |> + sf::st_drop_geometry() |> + filter(!is.na(begin)) |> # public, but "grade" set to unusable + select(monitoring_location_id, + parameter_name, + parameter_code, + begin, end), 6, "0.7em") + +``` + +::: footer + +::: + + +## read_waterdata_daily + +Replaces `readNWISdv`: + +![](images/read_dv.png) + +```{r} +daily <- read_waterdata_daily(monitoring_location_id = c("USGS-05406457", + "USGS-05427930"), + parameter_code = c("00060", "00010"), + statistic_id = "00003", + time = c("2024-10-01", "2025-07-07")) +``` + +::: footer + +::: + +## read_waterdata_daily {.smaller .scrollable} + +```{r} +#| echo: false + +dt_me(daily |> + select(-daily_id), 6, "0.7em") + +``` + +::: footer + +::: + +## read_waterdata_daily + +```{r} +ggplot(data = daily) + + geom_point(aes(x = time, y = value, + color = approval_status)) + + facet_grid(parameter_code ~ monitoring_location_id, + scale = "free") + +``` + +::: footer + +::: + +## USGS Water Data APIs Notes: time input {.smaller} + +:::: {.columns} + +::: {.column width="50%"} + +The "time" argument has a few options: + +* A single date (or date-time): "2024-10-01" or "2024-10-01T23:20:50Z" + +* A bounded interval: c("2024-10-01", "2025-07-02") + +* Half-bounded intervals: c("2024-10-01", NA) + +* Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + +::: + +::: {.column width="50%"} + +Here are a bunch of valid inputs: + +```{r} +#| code-line-numbers: "1-7|8-9|10-13|14-17" +# Ask for exact times: +time = "2025-01-01" +time = as.Date("2025-01-01") +time = "2025-01-01T23:20:50Z" +time = as.POSIXct("2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC") +# Ask for specific range +time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs +# Asking beginning of record to specific end: +time = c(NA, "2024-01-01") # or Date or POSIX +# Asking specific beginning to end of record: +time = c("2024-01-01", NA) # or Date or POSIX +# Ask for period +time = "P1M" # past month +time = "P7D" # past 7 days +time = "PT12H" # past hours +``` + +::: + +:::: + +## read_waterdata_latest_continuous{.smaller} + +Most recent observation for each time series of continuous data. Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. + +```{r} +latest_uv_data <- read_waterdata_latest_continuous(monitoring_location_id = "USGS-01491000", + parameter_code = "00060") + +latest_dane_county <- read_waterdata_latest_continuous(bbox = sf::st_bbox(site_info), + parameter_code = "00060") + +single_ts <- read_waterdata_latest_continuous(time_series_id = "202345d175874d2c814648ac9bea5deb") +``` + +::: {.callout-note} +Nearly all arguments can be vectors. +::: + + +::: footer + +::: + +## read_waterdata_latest_continuous{.smaller .scrollable} + +Latest discharge (00060) in Dane County, WI: + +```{r} +#| echo: false + +dt_me(latest_dane_county |> + sf::st_drop_geometry() |> + select(-time_series_id, + -statistic_id, + -latest_continuous_id), 6, "0.7em") + +``` + +::: footer + +::: + +## Map It: leaflet + +```{r} +#| output-location: slide +pal <- colorNumeric("viridis", latest_dane_county$value) + +leaflet(data = latest_dane_county |> + sf::st_transform(crs = leaflet_crs)) |> + addProviderTiles("CartoDB.Positron") |> + addCircleMarkers(popup = paste(latest_dane_county$monitoring_location_id, "
", + latest_dane_county$time, "
", + latest_dane_county$value, + latest_dane_county$unit_of_measure), + color = ~ pal(value), + radius = 3, + opacity = 1) |> + addLegend(pal = pal, + position = "bottomleft", + title = "Latest Discharge", + values = ~value) +``` + + +## read_waterdata + +* This function is totally different! + +* Uses [CQL2 Queries](https://www.ogc.org/standards/cql2/): Common Query Language (CQL2) + +* Great examples here: + + +::: footer + +::: + +## read_waterdata {.smaller} + +Wisconsin and Minnesota sites with a drainage area greater than 1000 mi^2: + +```{r} +cql <- '{ + "op": "and", + "args": [ + { + "op": "in", + "args": [ + { "property": "state_name" }, + [ "Wisconsin", "Minnesota" ] + ] + }, + { + "op": ">", + "args": [ + { "property": "drainage_area" }, + 1000 + ] + } + ] +}' + +sites_mn_wi <- read_waterdata(service = "monitoring-locations", + CQL = cql) + +``` + +::: footer + +::: + +## read_waterdata: Map It + +```{r} +#| output-location: slide +pal <- colorNumeric("viridis", sites_mn_wi$drainage_area) + +leaflet(data = sites_mn_wi |> + sf::st_transform(crs = leaflet_crs)) |> + addProviderTiles("CartoDB.Positron") |> + addCircleMarkers(popup = ~monitoring_location_name, + color = ~ pal(drainage_area), + radius = 3, + opacity = 1) |> + addLegend(pal = pal, + position = "bottomleft", + title = "Drainage Area", + values = ~drainage_area) + +``` + +## read_waterdata HUCs + +HUCs that fall within 02070010. Use the wildcard `%` + +```{r} +# Here's how to get +cql_huc_wildcard <- '{ +"op": "like", +"args": [ + { "property": "hydrologic_unit_code" }, + "02070010%" +] +}' + +what_huc_sites <- read_waterdata(service = "monitoring-locations", + CQL = cql_huc_wildcard) + +``` + +## read_waterdata HUCs + +```{r} +unique(what_huc_sites$hydrologic_unit_code) +``` + + +## General New Features of Water Data OGC APIs {.smaller} + +* Flexible Queries + + - Lots of options to define your query + + - Do NOT define all of them + + - Do NOT define to few of them + +* Flexible Columns Returned + + - Use the properties argument to ask for just the columns you want + +* Simple Features + + - Returns a geometry column that allows seamless integration with `sf` + +* CQL query support + + +## Lessons Learned + +* [Query limits](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html#query-limits) + + - There is a character limit to how big your query can be + + - Possible alternatives to large site lists are bounding box queries, or loops/applys/etc to chunk up the request + + - Need to balance the character size of the request with the requests per hour limit. + +## Limit Explanation + +* [Limits](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html#limit-vs-max_results) + + - `max_results` lets you define how many rows are returned + + - `limit` lets you define how many rows are returned **per page** of data. With a good internet connection, you can probably get away with ignoring this argument. + +I would ignore both most of the time. + +## Adding API token to CI jobs: GitLab + +If you run dataRetrieval calls in a CI job, you'll need to add an API Token to the configuration. + +* Go to: Settings -> CI/CD -> Variables -> Add Variable + +* Key should be API_USGS_PAT, value will be the token + +* Click on Masked and hidden + +* Add to your .gitlab-ci.yml file: + +``` +variables: + API_USGS_PAT: "${API_USGS_PAT}" +``` +## Adding API token to CI jobs: GitHub + +In GitHub: + +* Settings -> Secrets and variables -> Actions -> Secrets + +* Secret can be stored in Environment or Repository + +* If you created an Environment called "CI_config", your CI yaml will need: + +``` + environment: CI_config + env: + API_USGS_PAT: ${{ secrets.API_USGS_PAT }} +``` + +## Adding API token: Posit Connect + +You'll want to add a token for any Posit Connect product (Shiny app, Quarto slides, etc.). + +![](images/posit_connect.png) + +## Discrete Data {.smaller} + +* USGS switched to Aquarius Samples March 11, 2024. + +* On that day, the USGS data in the Water Quality Portal was frozen. + +* "modern USGS discrete data" = data that includes **pre** and **post** Aquarius Samples conversion. + +* The new function `read_waterdata_samples` gets modern USGS discrete data. + + - it is outside the Water Data OGC API ecosystem, so looks and feels a bit different. + +* [Water Quality Portal (WQP)](https://www.waterqualitydata.us) also has modern USGS discrete data, but not by default. + +* If you only need USGS data, use `read_waterdata_samples`, if you need USGS and non-USGS, use `readWQPdata`. + +## read_waterdata_samples + +Replaces `readNWISqw` + +`read_waterdata_samples` is the **SAME** as `read_USGS_samples`, but going forward we want to use waterdata for consistent branding. + +![](images/read_samples.png) + +::: footer + +::: + +## USGS Samples Data Notes: Data Types and Profiles + +* There are 2 arguments that dictate what kind of data is returned + - "dataType" defines what kind of data comes back + - "dataProfile" defines what columns from that type come back + +## Data Types and Profiles {.smaller} + +```{r} +#| echo: false + +df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), + Description = c("Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query"), + dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count')) + +dt_me(df, escape = FALSE, paging=FALSE) + +``` + + +::: footer + +::: + +## read_waterdata_samples + +```{r} +site <- "USGS-01631000" +pcode <- "00660" + +qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem") +ncol(qw_data) +``` + +That's a LOT of columns that come back. + +::: footer + +::: + +## Discrete data censoring + +Let's pull just a few columns out and look at those: + +```{r} +library(dplyr) + +qw_data_slim <- qw_data |> + select(Date = Activity_StartDate, + Result_Measure, + DL_cond = Result_ResultDetectionCondition, + DL_val_A = DetectionLimit_MeasureA, + DL_type_A = DetectionLimit_TypeA) |> + mutate(Result = if_else(!is.na(DL_cond), DL_val_A, Result_Measure), + Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected")) |> + arrange(Detected) + +``` + +::: footer + +::: + +## Discrete data censoring information {.smaller} + +```{r} +#| echo: false + +dt_me(qw_data_slim, page_length = 8, font = "0.7em") +``` + +::: footer + +::: + + +## summarize_waterdata_samples {.smaller .scrollable} + +A summary service exists for 1 site at a time (so in this case, monitoringLocationIdentifier cannot be a vector of sites): + +```{r} +data_at_site <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") + +``` + +```{r} +#| echo: false + +dt_me(data_at_site |> + arrange(desc(resultCount)), page_length = 4) +``` + +::: footer + +::: + +## Water Quality Portal + +If you use `readWQPqw`, add "legacy=FALSE" to get modern USGS data: + +```{r} +#| eval: false +pHsites_legacy <- readWQPqw("USGS-05406450", "pH", + legacy = FALSE) +``` + +If you use `readWQPdata`, add 'service = "ResultWQX3"': + +```{r} +#| eval: false +pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", + characteristicName = "pH", + service = "ResultWQX3", + dataProfile = "basicPhysChem") +``` + +## HELP! {.smaller} + +* There’s a lot of new information and changes being presented. There are going to be scripts that have been passed down through the years that will start breaking once the NWIS servers are decommissioned. + +* Check back on the documentation often: + +* Peruse the "Additional Articles" - when we find common issues people have with converting their old workflows, we will try to add articles to clarify new workflows. + +* If you have additional questions, email comptools@usgs.gov. + +::: footer + +::: + +## More Information {.smaller} + +- dataRetrieval repository: + - + +- Documentation: + - + +- Contact: + - Computational Tools Email: comptools@usgs.gov + +- Bug reports can be reported here: + - + +:::: footer + +::: {style="font-size: 80%;"} + +Any use of trade, firm, or product name is for descriptive purposes only and does not imply endorsement by the U.S. Government. +::: + +:::: + diff --git a/tutorials/custom.scss b/tutorials/custom.scss new file mode 100644 index 00000000..1e2b1aa8 --- /dev/null +++ b/tutorials/custom.scss @@ -0,0 +1,3 @@ +/*-- scss:defaults --*/ +$code-color: #06402B; +$code-bg: #F0F0F0; \ No newline at end of file diff --git a/tutorials/hex_logo.png b/tutorials/hex_logo.png new file mode 100644 index 00000000..0103ebfc Binary files /dev/null and b/tutorials/hex_logo.png differ diff --git a/tutorials/images/apperences.png b/tutorials/images/apperences.png new file mode 100644 index 00000000..74cd7b9a Binary files /dev/null and b/tutorials/images/apperences.png differ diff --git a/tutorials/images/autocomplete.png b/tutorials/images/autocomplete.png new file mode 100644 index 00000000..30c011cf Binary files /dev/null and b/tutorials/images/autocomplete.png differ diff --git a/tutorials/images/autocomplete_samples.png b/tutorials/images/autocomplete_samples.png new file mode 100644 index 00000000..b8d37135 Binary files /dev/null and b/tutorials/images/autocomplete_samples.png differ diff --git a/tutorials/images/dark_mode.png b/tutorials/images/dark_mode.png new file mode 100644 index 00000000..21f710a1 Binary files /dev/null and b/tutorials/images/dark_mode.png differ diff --git a/tutorials/images/data_retrievals.png b/tutorials/images/data_retrievals.png new file mode 100644 index 00000000..b108f3a3 Binary files /dev/null and b/tutorials/images/data_retrievals.png differ diff --git a/tutorials/images/default_rstudio.png b/tutorials/images/default_rstudio.png new file mode 100644 index 00000000..ab77ebe1 Binary files /dev/null and b/tutorials/images/default_rstudio.png differ diff --git a/tutorials/images/documentation_1.png b/tutorials/images/documentation_1.png new file mode 100644 index 00000000..cdfa4061 Binary files /dev/null and b/tutorials/images/documentation_1.png differ diff --git a/tutorials/images/documentation_2.png b/tutorials/images/documentation_2.png new file mode 100644 index 00000000..e046a45f Binary files /dev/null and b/tutorials/images/documentation_2.png differ diff --git a/tutorials/images/documentation_3.png b/tutorials/images/documentation_3.png new file mode 100644 index 00000000..b5a1b2a2 Binary files /dev/null and b/tutorials/images/documentation_3.png differ diff --git a/tutorials/images/help_file_2.png b/tutorials/images/help_file_2.png new file mode 100644 index 00000000..232e1028 Binary files /dev/null and b/tutorials/images/help_file_2.png differ diff --git a/tutorials/images/hex_logo.png b/tutorials/images/hex_logo.png new file mode 100644 index 00000000..0103ebfc Binary files /dev/null and b/tutorials/images/hex_logo.png differ diff --git a/tutorials/images/install.png b/tutorials/images/install.png new file mode 100644 index 00000000..3f0b21ea Binary files /dev/null and b/tutorials/images/install.png differ diff --git a/tutorials/images/legacy_wqp.png b/tutorials/images/legacy_wqp.png new file mode 100644 index 00000000..1af76290 Binary files /dev/null and b/tutorials/images/legacy_wqp.png differ diff --git a/tutorials/images/posit_connect.png b/tutorials/images/posit_connect.png new file mode 100644 index 00000000..86d0626d Binary files /dev/null and b/tutorials/images/posit_connect.png differ diff --git a/tutorials/images/read_dv.png b/tutorials/images/read_dv.png new file mode 100644 index 00000000..dcfc9592 Binary files /dev/null and b/tutorials/images/read_dv.png differ diff --git a/tutorials/images/read_ml.png b/tutorials/images/read_ml.png new file mode 100644 index 00000000..83322368 Binary files /dev/null and b/tutorials/images/read_ml.png differ diff --git a/tutorials/images/read_samples.png b/tutorials/images/read_samples.png new file mode 100644 index 00000000..77e2ad4c Binary files /dev/null and b/tutorials/images/read_samples.png differ diff --git a/tutorials/images/read_ts_meta.png b/tutorials/images/read_ts_meta.png new file mode 100644 index 00000000..483ee459 Binary files /dev/null and b/tutorials/images/read_ts_meta.png differ diff --git a/tutorials/images/save_token.png b/tutorials/images/save_token.png new file mode 100644 index 00000000..8b8e46b6 Binary files /dev/null and b/tutorials/images/save_token.png differ diff --git a/tutorials/images/shiny_app.png b/tutorials/images/shiny_app.png new file mode 100644 index 00000000..dfc70b14 Binary files /dev/null and b/tutorials/images/shiny_app.png differ diff --git a/tutorials/images/update_all.png b/tutorials/images/update_all.png new file mode 100644 index 00000000..558fdb7a Binary files /dev/null and b/tutorials/images/update_all.png differ diff --git a/tutorials/images/update_button.png b/tutorials/images/update_button.png new file mode 100644 index 00000000..b562ea0b Binary files /dev/null and b/tutorials/images/update_button.png differ diff --git a/vignettes/Status.Rmd b/vignettes/Status.Rmd index dafdb0fe..24610c7d 100644 --- a/vignettes/Status.Rmd +++ b/vignettes/Status.Rmd @@ -39,27 +39,31 @@ df <- data.frame( "readNWISdv", "readNWISsite", "whatNWISsites", - "", + "readNWISpCode", + "readNWISgwl", + "readNWISmeas", + "readNWISdata", "readNWISuv", "readNWISrating", "readNWISstat", - "readNWISmeas", "readNWISpeak", - "readNWISgwl", "readNWISuse", - "readNWISdata", - "whatNWISdata", - "readNWISpCode" + + "whatNWISdata" ), New = c( "read_waterdata_samples", "read_waterdata_daily", "read_waterdata_monitoring_location", "read_waterdata_ts_meta", + "read_waterdata_parameter_codes", + "read_waterdata_field_measurements", + "read_waterdata_field_measurements", "read_waterdata", - rep("", 10) + rep("", 6) ), - "Available on (branch)" = c("main (CRAN)", "main (CRAN)", "main (CRAN)", "main (CRAN)", "main (CRAN)", rep("", 10)) + "Available on (branch)" = c(rep("main (CRAN)", 5), + "develop", "develop", "main (CRAN)", rep("", 6)) ) knitr::kable(df, col.names = c("WaterServices (legacy) function", "Water Data (new) function", "Available on (branch name)")) diff --git a/vignettes/basic_slides.Rmd b/vignettes/basic_slides.Rmd new file mode 100644 index 00000000..aa7cb140 --- /dev/null +++ b/vignettes/basic_slides.Rmd @@ -0,0 +1,21 @@ +--- +title: "Discover and Download Data" +output: + rmarkdown::html_vignette: + toc: true + number_sections: false +vignette: > + %\VignetteIndexEntry{Discover and Download Data} + \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +Click the slide below then "f" for full screen, and "Esc" to escape full screen. + +```{=html} + +``` + diff --git a/vignettes/changes_slides.Rmd b/vignettes/changes_slides.Rmd new file mode 100644 index 00000000..d55b80c2 --- /dev/null +++ b/vignettes/changes_slides.Rmd @@ -0,0 +1,21 @@ +--- +title: "Updates to dataRetrieval" +output: + rmarkdown::html_vignette: + toc: true + number_sections: false +vignette: > + %\VignetteIndexEntry{Updates to dataRetrieval} + \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +Click the slide below then "f" for full screen, and "Esc" to escape full screen. + +```{=html} + +``` + diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd index 8efc2a97..2e5a4449 100644 --- a/vignettes/dataRetrieval.Rmd +++ b/vignettes/dataRetrieval.Rmd @@ -62,9 +62,9 @@ Functions <- c( "read_waterdata_daily", "readNWISuv", "readNWISrating", - "readNWISmeas", + "read_waterdata_field_measurements", "readNWISpeak", - "readNWISgwl", + "read_waterdata_field_measurements", "readNWISuse", "readNWISstat", "read_waterdata_parameter_codes", @@ -108,7 +108,11 @@ Description <- c( ) Source <- c("USGS Water Data API", "USGS Water Data API", - rep("NWIS", 7), + rep("NWIS",2), + "USGS Water Data API", + "NWIS", + "USGS Water Data API", + rep("NWIS",2), "USGS Water Data API", "USGS Water Data API", "USGS Samples Data", @@ -376,8 +380,8 @@ Data are retrieved from [https://waterservices.usgs.gov/docs/instantaneous-value Groundwater level measurements can be obtained with the `readNWISgwl` function. Information on the returned data can be found with the `comment` function, and attached attributes as described in the [metadata](#embedded-metadata) section. ```{r gwlexample, echo=TRUE, eval=FALSE} -siteNumber <- "434400121275801" -groundWater <- readNWISgwl(siteNumber) +siteNumber <- "USGS-434400121275801" +groundWater <- read_waterdata_field_measurements(monitoring_location_id = siteNumber) ``` @@ -410,7 +414,7 @@ These data are the discrete measurements of discharge that are made for the purp Surface-water measurement data can be obtained with the `readNWISmeas` function. ```{r surfexample, echo=TRUE, eval=FALSE} -surfaceData <- readNWISmeas(siteNumber) +surfaceData <- read_waterdata_field_measurements(monitoring_location_id = "USGS-01594440") ``` diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd index d3476f9c..260887f6 100644 --- a/vignettes/read_waterdata_functions.Rmd +++ b/vignettes/read_waterdata_functions.Rmd @@ -168,8 +168,6 @@ The `read_waterdata_daily` function replaces the `readNWISdv` function. To access these services on a web browser, go to . ```{r} -library(dataRetrieval) - daily_modern <- read_waterdata_daily(monitoring_location_id = "USGS-01491000", parameter_code = c("00060", "00010"), statistic_id = "00003", @@ -189,6 +187,28 @@ ggplot(data = daily_modern) + ``` +## Field Measurements + +The `read_waterdata_field_measurements` replaces both the `readNWISgwl` and `readNWISmeas` functions. + +`r dataRetrieval:::get_description("field-measurements")` + +```{r} +field_modern <- read_waterdata_field_measurements(monitoring_location_id = c("USGS-451605097071701", "USGS-263819081585801"), + time = c("2023-10-01", "2024-09-30")) + +``` + + +```{r} +ggplot(data = field_modern) + + geom_point(aes(x = time, y = value)) + + facet_grid(parameter_code ~ monitoring_location_id, scale = "free") + + theme_bw() + +``` + + ## Latest Continuous The `read_waterdata_latest_continuous` doesn't have an equivalent NWIS function to replace. It is used to get the latest continuous measurement @@ -371,6 +391,16 @@ coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") ``` +### Hydrologic Unit Codes + +`r dataRetrieval:::get_description("hydrologic-unit-codes")` + +```{r} +#| eval: false +huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +``` + + ### Medium Codes `r dataRetrieval:::get_description("medium-codes")` @@ -437,7 +467,7 @@ time_zone_codes <- read_waterdata_metadata("time-zone-codes") ## Discrete Samples -Discrete USGS water quality can be accessed via the `read_waterdata_samples` function. While this is a new, modern USGS endpoint, it is not served in the same infrastructure as the rest of these new advertised functions. See [Samples Data](articles/samples_data.html)) for information on accessing USGS discrete water quality data. +Discrete USGS water quality can be accessed via the `read_waterdata_samples` function. While this is a new, modern USGS endpoint, it is not served in the same OGC infrastructure as the rest of these new advertised functions. See [Samples Data](articles/samples_data.html)) for information on accessing USGS discrete water quality data. # New Features diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 54be8fea..eb446cc8 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -112,14 +112,14 @@ There are many types of data served from NWIS. To understand how the services ar |--------|:-------|------:|-------:| |uv|[readNWISuv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuv.html)|Continuous data| None yet | |dv|[readNWISdv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdv.html)|Daily aggregated | [read_waterdata_daily](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_daily.html) | -|gwlevels|[readNWISgwl](https://doi-usgs.github.io/dataRetrieval/reference/readNWISgwl.html)|Groundwater levels | None yet | +|gwlevels|[readNWISgwl](https://doi-usgs.github.io/dataRetrieval/reference/readNWISgwl.html)|Groundwater levels | [read_waterdata_field_measurements.html](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html) | |site|[readNWISsite](https://doi-usgs.github.io/dataRetrieval/reference/readNWISsite.html)|Site metadata| [read_waterdata_monitoring_location](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_monitoring_location.html) | |pcode|[readNWISpCode](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpCode.html)|Parameter code metadata | [read_waterdata_parameter_codes](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_parameter_codes.html) | |stat|[readNWISstat](https://doi-usgs.github.io/dataRetrieval/reference/readNWISstat.html)| Site statistics | None yet | |rating|[readNWISrating](https://doi-usgs.github.io/dataRetrieval/reference/readNWISrating.html)| Rating curves| None yet | |peak|[readNWISpeak](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpeak.html)|Peak flow| None yet | |use|[readNWISuse](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuse.html)|Water Use| None yet | -|meas|[readNWISmeas](https://doi-usgs.github.io/dataRetrieval/reference/readNWISmeas.html)|Discrete surface water| None yet | +|meas|[readNWISmeas](https://doi-usgs.github.io/dataRetrieval/reference/readNWISmeas.html)|Discrete surface water| [read_waterdata_field_measurements.html](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_field_measurements.html)| | | [readNWISdata](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdata.html) | General data import for NWIS| [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) |