Skip to content

Commit 6b6ad6a

Browse files
authored
Improve Metadata class (#110)
* make Metadata a dataclass * Remove `__name__ == "__main__"` from module * Refactor Metadata, take response in initiliazer * Check where Metadata objects are constructed * Refactor `set_metadata` functions within the `__init__` of `BaseMetadata` subclasses. * Add a `__repr__` to BaseMetadata * Remove dataclass import * Update children Metadata docstrings. * Implement site_info and variable_info * site_info and variable_info as property, not as callable * Implement site_info property * Update tests to handle site_info as property * Add tests to BaseMetadata * site_info and variable_info are not callables anymore * Complete docstring with class attributes * Add docstring to WQP metadata class
1 parent dab0f51 commit 6b6ad6a

File tree

6 files changed

+264
-142
lines changed

6 files changed

+264
-142
lines changed

dataretrieval/nwis.py

Lines changed: 99 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import re
1919

2020
from dataretrieval.utils import to_str, format_datetime, update_merge
21-
from dataretrieval.utils import set_metadata as set_md
21+
from dataretrieval.utils import BaseMetadata
2222
from .utils import query
2323

2424
WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/'
@@ -211,7 +211,7 @@ def _qwdata(datetime_index=True, ssl_check=True, **kwargs):
211211
df = format_datetime(df, 'sample_dt', 'sample_tm',
212212
'sample_start_time_datum_cd')
213213

214-
return format_response(df, **kwargs), _set_metadata(response, **kwargs)
214+
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
215215

216216

217217
def get_discharge_measurements(sites=None, start=None, end=None,
@@ -267,7 +267,7 @@ def get_discharge_measurements(sites=None, start=None, end=None,
267267
def _discharge_measurements(ssl_check=True, **kwargs):
268268
response = query_waterdata('measurements', format='rdb',
269269
ssl_check=ssl_check, **kwargs)
270-
return _read_rdb(response.text), _set_metadata(response, **kwargs)
270+
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)
271271

272272

273273
def get_discharge_peaks(sites=None, start=None, end=None,
@@ -328,7 +328,7 @@ def _discharge_peaks(ssl_check=True, **kwargs):
328328

329329
df = _read_rdb(response.text)
330330

331-
return format_response(df, service='peaks', **kwargs), _set_metadata(
331+
return format_response(df, service='peaks', **kwargs), NWIS_Metadata(
332332
response, **kwargs)
333333

334334

@@ -388,7 +388,7 @@ def _gwlevels(datetime_index=True, ssl_check=True, **kwargs):
388388
if datetime_index == True:
389389
df = format_datetime(df, 'lev_dt', 'lev_tm', 'lev_tz_cd')
390390

391-
return format_response(df, **kwargs), _set_metadata(response, **kwargs)
391+
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
392392

393393

394394
def get_stats(sites, ssl_check=True, **kwargs):
@@ -443,7 +443,7 @@ def get_stats(sites, ssl_check=True, **kwargs):
443443
response = query_waterservices('stat', sites=sites,
444444
ssl_check=ssl_check, **kwargs)
445445

446-
return _read_rdb(response.text), _set_metadata(response, **kwargs)
446+
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)
447447

448448

449449
def query_waterdata(service, ssl_check=True, **kwargs):
@@ -585,7 +585,7 @@ def _dv(ssl_check=True, **kwargs):
585585
ssl_check=ssl_check, **kwargs)
586586
df = _read_json(response.json())
587587

588-
return format_response(df, **kwargs), _set_metadata(response, **kwargs)
588+
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
589589

590590

591591
def get_info(ssl_check=True, **kwargs):
@@ -691,7 +691,7 @@ def get_info(ssl_check=True, **kwargs):
691691

692692
response = query_waterservices('site', ssl_check=ssl_check, **kwargs)
693693

694-
return _read_rdb(response.text), _set_metadata(response, **kwargs)
694+
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)
695695

696696

697697
def get_iv(sites=None, start=None, end=None, multi_index=True,
@@ -743,7 +743,7 @@ def _iv(ssl_check=True, **kwargs):
743743
response = query_waterservices('iv', format='json',
744744
ssl_check=ssl_check, **kwargs)
745745
df = _read_json(response.json())
746-
return format_response(df, **kwargs), _set_metadata(response, **kwargs)
746+
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
747747

748748

749749
def get_pmcodes(parameterCd='All', partial=True, ssl_check=True):
@@ -792,7 +792,7 @@ def get_pmcodes(parameterCd='All', partial=True, ssl_check=True):
792792
payload.update({'group_cd': '%'})
793793
url = ALLPARAMCODES_URL
794794
response = query(url, payload, ssl_check=ssl_check)
795-
return _read_rdb(response.text), _set_metadata(response)
795+
return _read_rdb(response.text), NWIS_Metadata(response)
796796

797797
else:
798798
parameterCd = [parameterCd]
@@ -814,7 +814,7 @@ def get_pmcodes(parameterCd='All', partial=True, ssl_check=True):
814814
l.append(_read_rdb(response.text))
815815
else:
816816
raise TypeError('Parameter information (code or name) must be type string')
817-
return pd.concat(l), _set_metadata(response)
817+
return pd.concat(l), NWIS_Metadata(response)
818818

819819

820820
def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL",
@@ -874,7 +874,7 @@ def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL",
874874
url = WATERDATA_BASE_URL + state + "/nwis/water_use"
875875
payload.update({"wu_area": "county"})
876876
response = query(url, payload, ssl_check=ssl_check)
877-
return _read_rdb(response.text), _set_metadata(response)
877+
return _read_rdb(response.text), NWIS_Metadata(response)
878878

879879

880880
def get_ratings(site=None, file_type="base", ssl_check=True, **kwargs):
@@ -932,7 +932,7 @@ def _ratings(site, file_type, ssl_check=True):
932932
raise ValueError('Unrecognized file_type: {}, must be "base", "corr" or "exsa"'.format(file_type))
933933
payload.update({"file_type" : file_type})
934934
response = query(url, payload, ssl_check=ssl_check)
935-
return _read_rdb(response.text), _set_metadata(response, site_no=site)
935+
return _read_rdb(response.text), NWIS_Metadata(response, site_no=site)
936936

937937

938938
def what_sites(ssl_check=True, **kwargs):
@@ -965,7 +965,7 @@ def what_sites(ssl_check=True, **kwargs):
965965

966966
df = _read_rdb(response.text)
967967

968-
return df, _set_metadata(response, **kwargs)
968+
return df, NWIS_Metadata(response, **kwargs)
969969

970970

971971
def get_record(sites=None, start=None, end=None,
@@ -1236,51 +1236,90 @@ def _read_rdb(rdb):
12361236
df = format_response(df)
12371237
return df
12381238

1239-
1240-
def _set_metadata(response, **parameters):
1241-
"""Generates a standard set of metadata informed by the response.
1242-
1243-
Parameters
1239+
class NWIS_Metadata(BaseMetadata):
1240+
"""Metadata class for NWIS service, derived from BaseMetadata.
1241+
1242+
Attributes
12441243
----------
1245-
response: Response
1246-
Response object from requests module
1247-
parameters: unpacked dictionary
1248-
Unpacked dictionary of the parameters supplied in the request
1249-
1250-
Returns
1251-
-------
1252-
md: :obj:`dataretrieval.utils.Metadata`
1253-
A ``dataretrieval`` custom :obj:`dataretrieval.utils.Metadata` object.
1254-
1244+
url : str
1245+
Response url
1246+
query_time: datetme.timedelta
1247+
Response elapsed time
1248+
header: requests.structures.CaseInsensitiveDict
1249+
Response headers
1250+
comments: str | None
1251+
Metadata comments, if any
1252+
site_info: tuple[pd.DataFrame, NWIS_Metadata] | None
1253+
Site information if the query included `site_no`, `sites`, `stateCd`,
1254+
`huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if
1255+
both are present.
1256+
variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None
1257+
Variable information if the query included `parameterCd`.
1258+
12551259
"""
1256-
md = set_md(response)
1257-
# site_no is preferred over sites to set site_info if both are present,
1258-
# matching behavior of the get_rating() function
1259-
if 'site_no' in parameters:
1260-
md.site_info = lambda: what_sites(sites=parameters['site_no'])
1261-
elif 'sites' in parameters:
1262-
md.site_info = lambda: what_sites(sites=parameters['sites'])
1263-
elif 'stateCd' in parameters:
1264-
md.site_info = lambda: what_sites(stateCd=parameters['stateCd'])
1265-
elif 'huc' in parameters:
1266-
md.site_info = lambda: what_sites(huc=parameters['huc'])
1267-
elif 'countyCd' in parameters:
1268-
md.site_info = lambda: what_sites(countyCd=parameters['countyCd'])
1269-
elif 'bBox' in parameters:
1270-
md.site_info = lambda: what_sites(bBox=parameters['bBox'])
1271-
else:
1272-
pass # don't set metadata site_info attribute
1273-
1274-
# define variable_info metadata based on parameterCd if available
1275-
if 'parameterCd' in parameters:
1276-
md.variable_info = lambda: get_pmcodes(
1277-
parameterCd=parameters['parameterCd'])
1278-
1279-
comments = ""
1280-
for line in response.text.splitlines():
1281-
if line.startswith("#"):
1282-
comments += line.lstrip("#") + "\n"
1283-
if comments != "":
1284-
md.comment = comments
1285-
1286-
return md
1260+
def __init__(self, response, **parameters) -> None:
1261+
"""Generates a standard set of metadata informed by the response with specific
1262+
metadata for NWIS data.
1263+
1264+
Parameters
1265+
----------
1266+
response: Response
1267+
Response object from requests module
1268+
parameters: unpacked dictionary
1269+
Unpacked dictionary of the parameters supplied in the request
1270+
1271+
Returns
1272+
-------
1273+
md: :obj:`dataretrieval.nwis.NWIS_Metadata`
1274+
A ``dataretrieval`` custom :obj:`dataretrieval.nwis.NWIS_Metadata` object.
1275+
1276+
"""
1277+
super().__init__(response)
1278+
1279+
comments = ""
1280+
for line in response.text.splitlines():
1281+
if line.startswith("#"):
1282+
comments += line.lstrip("#") + "\n"
1283+
if comments:
1284+
self.comment = comments
1285+
1286+
self._parameters = parameters
1287+
1288+
@property
1289+
def site_info(self):
1290+
"""
1291+
Return
1292+
------
1293+
df: ``pandas.DataFrame``
1294+
Formatted requested data from calling `nwis.what_sites`
1295+
md: :obj:`dataretrieval.nwis.NWIS_Metadata`
1296+
A NWIS_Metadata object
1297+
"""
1298+
if 'site_no' in self._parameters:
1299+
return what_sites(sites=self._parameters['site_no'])
1300+
1301+
elif 'sites' in self._parameters:
1302+
return what_sites(sites=self._parameters['sites'])
1303+
1304+
elif 'stateCd' in self._parameters:
1305+
return what_sites(stateCd=self._parameters['stateCd'])
1306+
1307+
elif 'huc' in self._parameters:
1308+
return what_sites(huc=self._parameters['huc'])
1309+
1310+
elif 'countyCd' in self._parameters:
1311+
return what_sites(countyCd=self._parameters['countyCd'])
1312+
1313+
elif 'bBox' in self._parameters:
1314+
return what_sites(bBox=self._parameters['bBox'])
1315+
1316+
else:
1317+
return None # don't set metadata site_info attribute
1318+
1319+
@property
1320+
def variable_info(self):
1321+
1322+
# define variable_info metadata based on parameterCd if available
1323+
if 'parameterCd' in self._parameters:
1324+
return get_pmcodes(parameterCd=self._parameters['parameterCd'])
1325+

dataretrieval/utils.py

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import dataretrieval
88
from dataretrieval.codes import tz
99

10-
1110
def to_str(listlike, delimiter=','):
1211
"""Translates list-like objects into strings.
1312
@@ -135,32 +134,64 @@ def update_merge(left, right, na_only=False, on=None, **kwargs):
135134

136135
return df
137136

138-
139-
class Metadata:
140-
"""Custom class for metadata.
141-
"""
142-
url = None
143-
query_time = None
144-
site_info = None
145-
header = None
146-
variable_info = None
147-
comment = None
148-
149-
# note sure what statistic_info is
150-
statistic_info = None
151-
# disclaimer seems to be only part of importWaterML1
152-
disclaimer = None
153-
154-
155-
def set_metadata(response):
156-
"""Function to initialize and set metadata from an API response.
137+
class BaseMetadata:
138+
"""Base class for metadata.
139+
140+
Attributes
141+
----------
142+
url : str
143+
Response url
144+
query_time: datetme.timedelta
145+
Response elapsed time
146+
header: requests.structures.CaseInsensitiveDict
147+
Response headers
148+
157149
"""
158-
md = Metadata()
159-
md.url = response.url
160-
md.query_time = response.elapsed
161-
md.header = response.headers
162-
return md
163-
150+
151+
def __init__(self, response) -> None:
152+
"""Generates a standard set of metadata informed by the response.
153+
154+
Parameters
155+
----------
156+
response: Response
157+
Response object from requests module
158+
159+
Returns
160+
-------
161+
md: :obj:`dataretrieval.utils.BaseMetadata`
162+
A ``dataretrieval`` custom :obj:`dataretrieval.utils.BaseMetadata` object.
163+
164+
"""
165+
166+
# These are built from the API response
167+
self.url = response.url
168+
self.query_time = response.elapsed
169+
self.header = response.headers
170+
self.comment = None
171+
172+
# # not sure what statistic_info is
173+
# self.statistic_info = None
174+
175+
# # disclaimer seems to be only part of importWaterML1
176+
# self.disclaimer = None
177+
178+
# These properties are to be set by `nwis` or `wqp`-specific metadata classes.
179+
@property
180+
def site_info(self):
181+
raise NotImplementedError(
182+
"site_info must be implemented by utils.BaseMetadata children"
183+
)
184+
185+
@property
186+
def variable_info(self):
187+
raise NotImplementedError(
188+
"variable_info must be implemented by utils.BaseMetadata children"
189+
)
190+
191+
192+
def __repr__(self) -> str:
193+
return f"{type(self).__name__}(url={self.url})"
194+
164195

165196
def query(url, payload, delimiter=',', ssl_check=True):
166197
"""Send a query.
@@ -234,3 +265,4 @@ def __init__(self, url):
234265

235266
def __str__(self):
236267
return "No sites/data found using the selection criteria specified in url: {}".format(self.url)
268+

0 commit comments

Comments
 (0)