Skip to content

Commit 237ed81

Browse files
authored
Merge pull request #195 from thodson-usgs/set-waterdata-types
Set waterdata data types
2 parents ff92fa9 + 9301ff6 commit 237ed81

File tree

2 files changed

+40
-25
lines changed

2 files changed

+40
-25
lines changed

dataretrieval/waterdata/api.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,7 @@ def get_daily(
165165
if your internet connection is spotty. The default (NA) will set the
166166
limit to the maximum allowable limit for the service.
167167
convert_type : boolean, optional
168-
If True, the function will convert the data to dates and qualifier to
169-
string vector
168+
If True, converts columns to appropriate types.
170169
171170
Returns
172171
-------
@@ -475,6 +474,8 @@ def get_monitoring_locations(
475474
The returning object will be a data frame with no spatial information.
476475
Note that the USGS Water Data APIs use camelCase "skipGeometry" in
477476
CQL2 queries.
477+
convert_type : boolean, optional
478+
If True, converts columns to appropriate types.
478479
479480
Returns
480481
-------
@@ -666,8 +667,7 @@ def get_time_series_metadata(
666667
if your internet connection is spotty. The default (None) will set the
667668
limit to the maximum allowable limit for the service.
668669
convert_type : boolean, optional
669-
If True, the function will convert the data to dates and qualifier to
670-
string vector
670+
If True, converts columns to appropriate types.
671671
672672
Returns
673673
-------
@@ -842,8 +842,7 @@ def get_latest_continuous(
842842
if your internet connection is spotty. The default (None) will set the
843843
limit to the maximum allowable limit for the service.
844844
convert_type : boolean, optional
845-
If True, the function will convert the data to dates and qualifier to
846-
string vector
845+
If True, converts columns to appropriate types.
847846
848847
Returns
849848
-------
@@ -1017,8 +1016,7 @@ def get_latest_daily(
10171016
if your internet connection is spotty. The default (None) will set the
10181017
limit to the maximum allowable limit for the service.
10191018
convert_type : boolean, optional
1020-
If True, the function will convert the data to dates and qualifier to
1021-
string vector
1019+
If True, converts columns to appropriate types.
10221020
10231021
Returns
10241022
-------
@@ -1183,8 +1181,7 @@ def get_field_measurements(
11831181
if your internet connection is spotty. The default (None) will set the
11841182
limit to the maximum allowable limit for the service.
11851183
convert_type : boolean, optional
1186-
If True, the function will convert the data to dates and qualifier to
1187-
string vector
1184+
If True, converts columns to appropriate types.
11881185
11891186
Returns
11901187
-------

dataretrieval/waterdata/utils.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -667,32 +667,48 @@ def _arrange_cols(
667667
return df.rename(columns={"id": output_id})
668668

669669

670-
def _cleanup_cols(df: pd.DataFrame, service: str = "daily") -> pd.DataFrame:
670+
def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
671671
"""
672-
Cleans and standardizes columns in a pandas DataFrame for water data endpoints.
672+
Casts columns into appropriate types.
673673
674674
Parameters
675675
----------
676676
df : pd.DataFrame
677677
The input DataFrame containing water data.
678-
service : str, optional
679-
The type of water data service (default is "daily").
680678
681679
Returns
682680
-------
683681
pd.DataFrame
684-
The cleaned DataFrame with standardized columns.
682+
The DataFrame with columns cast to appropriate types.
685683
686-
Notes
687-
-----
688-
- If the 'time' column exists and service is "daily", it is converted to date objects.
689-
- The 'value' and 'contributing_drainage_area' columns are coerced to numeric types.
690684
"""
691-
if "time" in df.columns and service == "daily":
692-
df["time"] = pd.to_datetime(df["time"]).dt.date
693-
for col in ["value", "contributing_drainage_area"]:
694-
if col in df.columns:
695-
df[col] = pd.to_numeric(df[col], errors="coerce")
685+
cols = set(df.columns)
686+
numerical_cols = [
687+
"altitude",
688+
"altitude_accuracy",
689+
"contributing_drainage_area",
690+
"drainage_area",
691+
"hole_constructed_depth",
692+
"value",
693+
"well_constructed_depth",
694+
]
695+
time_cols = [
696+
"begin",
697+
"begin_utc",
698+
"construction_date",
699+
"end",
700+
"end_utc",
701+
"datetime", # unused
702+
"last_modified",
703+
"time",
704+
]
705+
706+
for col in cols.intersection(time_cols):
707+
df[col] = pd.to_datetime(df[col], errors="coerce")
708+
709+
for col in cols.intersection(numerical_cols):
710+
df[col] = pd.to_numeric(df[col], errors="coerce")
711+
696712
return df
697713

698714

@@ -748,8 +764,10 @@ def get_ogc_data(
748764
)
749765
# Manage some aspects of the returned dataset
750766
return_list = _deal_with_empty(return_list, properties, service)
767+
751768
if convert_type:
752-
return_list = _cleanup_cols(return_list, service=service)
769+
return_list = _type_cols(return_list)
770+
753771
return_list = _arrange_cols(return_list, properties, output_id)
754772
# Create metadata object from response
755773
metadata = BaseMetadata(response)

0 commit comments

Comments
 (0)