Skip to content
4 changes: 2 additions & 2 deletions dataretrieval/waterdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

# Public API exports
from .api import (
_check_profiles,
get_codes,
get_continuous,
get_daily,
get_field_measurements,
get_latest_continuous,
get_latest_daily,
get_monitoring_locations,
get_reference_table,
get_samples,
get_time_series_metadata,
)
Expand All @@ -37,9 +37,9 @@
"get_latest_continuous",
"get_latest_daily",
"get_monitoring_locations",
"get_reference_table",
"get_samples",
"get_time_series_metadata",
"_check_profiles",
"CODE_SERVICES",
"SERVICES",
"PROFILES",
Expand Down
107 changes: 77 additions & 30 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,17 @@
from dataretrieval.utils import BaseMetadata, to_str
from dataretrieval.waterdata.types import (
CODE_SERVICES,
PROFILE_LOOKUP,
METADATA_COLLECTIONS,
PROFILES,
SERVICES,
)
from dataretrieval.waterdata.utils import SAMPLES_URL, get_ogc_data
from dataretrieval.waterdata.utils import (
SAMPLES_URL,
get_ogc_data,
_construct_api_requests,
_walk_pages,
_check_profiles
)

# Set up logger for this module
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -685,6 +691,8 @@ def get_time_series_metadata(
parameter_name: Optional[Union[str, List[str]]] = None,
properties: Optional[Union[str, List[str]]] = None,
statistic_id: Optional[Union[str, List[str]]] = None,
hydrologic_unit_code: Optional[Union[str, List[str]]] = None,
state_name: Optional[Union[str, List[str]]] = None,
last_modified: Optional[Union[str, List[str]]] = None,
begin: Optional[Union[str, List[str]]] = None,
end: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -736,6 +744,17 @@ def get_time_series_metadata(
Example codes include 00001 (max), 00002 (min), and 00003 (mean).
A complete list of codes and their descriptions can be found at
https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html.
hydrologic_unit_code : string or list of strings, optional
The United States is divided and sub-divided into successively smaller
hydrologic units which are classified into four levels: regions,
sub-regions, accounting units, and cataloging units. The hydrologic
units are arranged within each other, from the smallest (cataloging units)
to the largest (regions). Each hydrologic unit is identified by a unique
hydrologic unit code (HUC) consisting of two to eight digits based on the
four levels of classification in the hydrologic unit system.
state_name : string or list of strings, optional
The name of the state or state equivalent in which the monitoring location
is located.
last_modified : string, optional
The last time a record was refreshed in our database. This may happen
due to regular operational processes and does not necessarily indicate
Expand Down Expand Up @@ -1388,6 +1407,62 @@ def get_field_measurements(

return get_ogc_data(args, output_id, service)

def get_reference_table(
collection: str,
limit: Optional[int] = None,
) -> Tuple[pd.DataFrame, BaseMetadata]:
"""Get metadata reference tables for the USGS Water Data API.

Reference tables provide the range of allowable values for parameter
arguments in the waterdata module.

Parameters
----------
collection : string
One of the following options: "agency-codes", "altitude-datums",
"aquifer-codes", "aquifer-types", "coordinate-accuracy-codes",
"coordinate-datum-codes", "coordinate-method-codes", "counties",
"hydrologic-unit-codes", "medium-codes", "national-aquifer-codes",
"parameter-codes", "reliability-codes", "site-types", "states",
"statistic-codes", "topographic-codes", "time-zone-codes"
limit : numeric, optional
The optional limit parameter is used to control the subset of the
selected features that should be returned in each page. The maximum
allowable limit is 50000. It may be beneficial to set this number lower
if your internet connection is spotty. The default (None) will set the
limit to the maximum allowable limit for the service.
"""
valid_code_services = get_args(METADATA_COLLECTIONS)
if collection not in valid_code_services:
raise ValueError(
f"Invalid code service: '{collection}'. "
f"Valid options are: {valid_code_services}."
)

req = _construct_api_requests(
service=collection,
limit=limit,
skip_geometry=True,
)
# Run API request and iterate through pages if needed
return_list, response = _walk_pages(
geopd=False, req=req
)

# Give ID column a more meaningful name
if collection.endswith("s"):
return_list = return_list.rename(
columns={"id": f"{collection[:-1].replace('-', '_')}_id"}
)
else:
return_list = return_list.rename(
columns={"id": f"{collection.replace('-', '_')}_id"}
)

# Create metadata object from response
metadata = BaseMetadata(response)
return return_list, metadata


def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
"""Return codes from a Samples code service.
Expand Down Expand Up @@ -1641,31 +1716,3 @@ def get_samples(

return df, BaseMetadata(response)


def _check_profiles(
service: SERVICES,
profile: PROFILES,
) -> None:
"""Check whether a service profile is valid.

Parameters
----------
service : string
One of the service names from the "services" list.
profile : string
One of the profile names from "results_profiles",
"locations_profiles", "activities_profiles",
"projects_profiles" or "organizations_profiles".
"""
valid_services = get_args(SERVICES)
if service not in valid_services:
raise ValueError(
f"Invalid service: '{service}'. Valid options are: {valid_services}."
)

valid_profiles = PROFILE_LOOKUP[service]
if profile not in valid_profiles:
raise ValueError(
f"Invalid profile: '{profile}' for service '{service}'. "
f"Valid options are: {valid_profiles}."
)
21 changes: 21 additions & 0 deletions dataretrieval/waterdata/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,27 @@
"states",
]

METADATA_COLLECTIONS = Literal[
"agency-codes",
"altitude-datums",
"aquifer-codes",
"aquifer-types",
"coordinate-accuracy-codes",
"coordinate-datum-codes",
"coordinate-method-codes",
"counties",
"hydrologic-unit-codes",
"medium-codes",
"national-aquifer-codes",
"parameter-codes",
"reliability-codes",
"site-types",
"states",
"statistic-codes",
"topographic-codes",
"time-zone-codes",
]

SERVICES = Literal[
"activities",
"locations",
Expand Down
79 changes: 58 additions & 21 deletions dataretrieval/waterdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union, get_args

import pandas as pd
import requests
Expand All @@ -13,6 +13,12 @@
from dataretrieval.utils import BaseMetadata
from dataretrieval import __version__

from dataretrieval.waterdata.types import (
PROFILE_LOOKUP,
PROFILES,
SERVICES,
)

try:
import geopandas as gpd

Expand Down Expand Up @@ -547,7 +553,7 @@ def _walk_pages(
logger.info("Requesting: %s", req.url)

if not geopd:
logger.warning(
logger.info(
"Geopandas not installed. Geometries will be flattened into pandas DataFrames."
)

Expand Down Expand Up @@ -648,35 +654,38 @@ def _arrange_cols(
pd.DataFrame or gpd.GeoDataFrame
The DataFrame with columns rearranged and/or renamed according to the specified properties and output_id.
"""

# Rename id column to output_id
df = df.rename(columns={"id": output_id})

# If properties are provided, filter to only those columns
# plus geometry if skip_geometry is False
if properties and not all(pd.isna(properties)):
if "id" not in properties:
# If user refers to service-specific output id in properties,
# then rename the "id" column to the output_id (id column is
# automatically included).
if output_id in properties:
df = df.rename(columns={"id": output_id})
# If output id is not in properties, but user requests the plural
# of the output_id (e.g. "monitoring_locations_id"), then rename
# "id" to plural. This is pretty niche.
else:
plural = output_id.replace("_id", "s_id")
if plural in properties:
df = df.rename(columns={"id": plural})
# Make sure geometry stays in the dataframe if skip_geometry is False
if 'geometry' in df.columns and 'geometry' not in properties:
properties.append('geometry')
# id is technically a valid column from the service, but these
# functions make the name more specific. So, if someone requests
# 'id', give them the output_id column
if 'id' in properties:
properties[properties.index('id')] = output_id
df = df.loc[:, [col for col in properties if col in df.columns]]
else:
df = df.rename(columns={"id": output_id})


# Move meaningless-to-user, extra id columns to the end
# of the dataframe, if they exist
extra_id_cols = set(df.columns).intersection({
extra_id_col = set(df.columns).intersection({
"latest_continuous_id",
"latest_daily_id",
"daily_id",
"continuous_id",
"field_measurement_id"
})
if extra_id_cols:
id_col_order = [col for col in df.columns if col not in extra_id_cols] + list(extra_id_cols)

# If the arbitrary id column is returned (either due to properties
# being none or NaN), then move it to the end of the dataframe, but
# if part of properties, keep in requested order
if extra_id_col and (properties is None or all(pd.isna(properties))):
id_col_order = [col for col in df.columns if col not in extra_id_col] + list(extra_id_col)
df = df.loc[:, id_col_order]

return df
Expand Down Expand Up @@ -821,3 +830,31 @@ def get_ogc_data(
return return_list, metadata


def _check_profiles(
service: SERVICES,
profile: PROFILES,
) -> None:
"""Check whether a service profile is valid.

Parameters
----------
service : string
One of the service names from the "services" list.
profile : string
One of the profile names from "results_profiles",
"locations_profiles", "activities_profiles",
"projects_profiles" or "organizations_profiles".
"""
valid_services = get_args(SERVICES)
if service not in valid_services:
raise ValueError(
f"Invalid service: '{service}'. Valid options are: {valid_services}."
)

valid_profiles = PROFILE_LOOKUP[service]
if profile not in valid_profiles:
raise ValueError(
f"Invalid profile: '{profile}' for service '{service}'. "
f"Valid options are: {valid_profiles}."
)

Loading
Loading