Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,25 @@ DOE RTC
io.fetch.rtc.request_doe_rtc_data
io.fetch.rtc.fetch_doe_rtc

NREL PVDAQ
----------

.. autosummary::
:toctree: generated/

io.fetch.pvdaq.get_pvdaq_metadata
io.fetch.pvdaq.get_pvdaq_data


Reference observations
----------------------
======================

The following modules contain code for initializing the reference
database, wrappers for fetching data, functions for processing (e.g.
renaming and resampling) data, and wrapper functions for posting data.
The pure fetch functions are found in ``pvlib.iotools`` and in
``solarforecastarbiter.io.fetch``. See the source code for additional
files with site and observation metadata.

.. autosummary::
:toctree: generated/
Expand All @@ -295,6 +312,7 @@ Reference observations
io.reference_observations.srml
io.reference_observations.surfrad
io.reference_observations.arm
io.reference_observations.pvdaq

SFA API
=======
Expand Down
3 changes: 2 additions & 1 deletion docs/source/whatsnew/1.0.0rc1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ Enhancements
limit each request to one week of data (:issue:`424`) (:pull:`435`)
* PDF report figures are generated instead of SVG for easy integration into PDF
reports (:issue:`360`) (:pull:`437`)

* Added support for NREL PVDAQ sites to the reference database functions.
(:issue:`397`) (:pull:`438`)

Bug fixes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion solarforecastarbiter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def referencedata():

network_opt = click.option(
'--network', multiple=True,
help="The Networks to act on. Defaults to all.",
help="The networks to act on. Defaults to all.",
default=reference_data.NETWORK_OPTIONS,
type=click.Choice(reference_data.NETWORK_OPTIONS))

Expand Down
2 changes: 1 addition & 1 deletion solarforecastarbiter/datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ class Observation(BaseModel):
Variable name, e.g. power, GHI. Each allowed variable has an
associated pre-defined unit.
interval_value_type : str
The type of the data in the observation. Typically interval mean or
The type of the data in the observation. Typically interval_mean or
instantaneous, but additional types may be defined for events.
interval_length : pandas.Timedelta
The length of time between consecutive data points, e.g. 5 minutes,
Expand Down
106 changes: 106 additions & 0 deletions solarforecastarbiter/io/fetch/pvdaq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Functions to read NREL PVDAQ data.
"""

# Code originally written by Bennet Meyers (@bmeyers), Stanford, SLAC in
# https://github.com/pvlib/pvlib-python/pull/664
# Adapated by Will Holmgren (@wholmgren), University of Arizona

import json
from io import StringIO

import requests
import pandas as pd


# consider adding an auth=(username, password) kwarg (default None) to
# support private data queries

def get_pvdaq_metadata(system_id, api_key):
"""Query PV system metadata from NREL's PVDAQ data service.
Parameters
----------
system_id: int
The system ID corresponding to the site that data should be
queried from.
api_key: string
Your NREL API key (https://developer.nrel.gov/docs/api-key/)
Returns
-------
dict
"""

params = {'system_id': system_id, 'api_key': api_key}
sites_url = 'https://developer.nrel.gov/api/pvdaq/v3/sites.json'
r = requests.get(sites_url, params=params)
r.raise_for_status()
outputs = json.loads(r.content)['outputs']
return outputs


def get_pvdaq_data(system_id, year, api_key='DEMO_KEY'):
"""Query PV system data from NREL's PVDAQ data service:
https://maps.nrel.gov/pvdaq/
This function uses the annual raw data file API, which is the most
efficient way of accessing multi-year, sub-hourly time series data.
Parameters
----------
system_id: int
The system ID corresponding to the site that data should be
queried from.
year: int or list of ints
Either the year to request or the list of years to request.
Multiple years will be concatenated into a single DataFrame.
api_key: string
Your NREL API key (https://developer.nrel.gov/docs/api-key/)
Returns
-------
pandas.DataFrame
A DataFrame containing the time series data from the
PVDAQ service over the years requested. Times are typically
in local time.
Notes
-----
The PVDAQ metadata contains a key "available_years" that is a useful
value for the *year* argument.
"""

try:
year = int(year)
except TypeError:
year = [int(yr) for yr in year]
else:
year = [year]

# Each year must queries separately, so iterate over the years and
# generate a list of dataframes.
# Consider putting this loop in its own private function with
# try / except / try again pattern for network issues and NREL API
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will we see any issues from this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pvlib CI struggled with a different NREL API but I haven't run into any with the pvdaq API. They have a 1000 requests per hour limit but we are no where close to that. Let's see if it's a problem in the rc cycle.

# throttling
df_list = []
for yr in year:
params = {
'api_key': api_key,
'system_id': system_id,
'year': yr
}
base_url = 'https://developer.nrel.gov/api/pvdaq/v3/data_file'
response = requests.get(base_url, params=params)
response.raise_for_status()
df = pd.read_csv(StringIO(response.text))
df_list.append(df)

# concatenate the list of yearly DataFrames
df = pd.concat(df_list, axis=0, sort=True)
df['Date-Time'] = pd.to_datetime(df['Date-Time'])
df.set_index('Date-Time', inplace=True)
return df
16 changes: 9 additions & 7 deletions solarforecastarbiter/io/reference_observations/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Reference Observaitons
Module for importing reference data into the SolarForecastArbiter.
# Reference Observations

This module serves two purposes:
- Creating reference metadata objects.
- Any sites found in an observation network and their associated Observations and avaialable metadata.
- Importing measurements
- Interacting with a Network's API to import the appropriate data as it becomes available.
Package for importing reference data into the Solar Forecast Arbiter.

This package serves two purposes:

- Creating reference metadata objects.
- Any sites found in an observation network and their associated Observations and available metadata.
- Importing reference measurements
- Interacting with a network's API to import the appropriate data as it becomes available.
34 changes: 25 additions & 9 deletions solarforecastarbiter/io/reference_observations/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,21 @@ def create_observation(api, site, variable, extra_params=None, **kwargs):
'variable': variable,
'extra_parameters': json.dumps(extra_parameters)
})

return check_and_post_observation(api, observation)


def check_and_post_observation(api, observation):
existing = existing_observations(api)
if observation.name in existing:
logger.info('Observation, %s, already exists', observation_name)
logger.info('Observation, %s, already exists', observation.name)
return existing[observation.name]

try:
created = api.create_observation(observation)
except HTTPError as e:
logger.error(f'Failed to create {variable} observation at Site '
f'{site.name}.')
logger.error(f'Failed to create {observation.variable} observation '
f'at Site {observation.site.name}.')
logger.debug(f'HTTP Error: {e.response.text}')
else:
logger.info(f"Observation {created.name} created successfully.")
Expand Down Expand Up @@ -263,7 +268,7 @@ def update_site_observations(api, fetch_func, site, observations,
An active Reference user session.
fetch_func : function
A function that requests data and returns a DataFrame for a given site.
The function should accept the parameters (api, site, start end) as
The function should accept the parameters (api, site, start, end) as
they appear in this function.
site : solarforecastarbiter.datamodel.Site
The Site with observations to update.
Expand Down Expand Up @@ -291,21 +296,26 @@ def update_site_observations(api, fetch_func, site, observations,
post_observation_data(api, obs, data_in_range, start, end)


def _prepare_data_to_post(data, variable, observation, start, end):
def _prepare_data_to_post(data, variable, observation, start, end,
resample_how):
"""Manipulate the data including reindexing to observation.interval_label
to prepare for posting"""
data = data[[variable]]
data = data.rename(columns={variable: 'value'})
# ensure data is sorted before slicing and for optimal order in the
# database
data = data.sort_index()

if resample_how:
resampler = data.resample(observation.interval_length)
data = getattr(resampler, resample_how)()

# remove all future values, some files have forward filled nightly data
data = data[start:min(end, _utcnow())]
# we assume any reference data is given at the proper intervals
# and already averaged if appropriate
# so just reindex the data to put nans where required

if data.empty:
return data
# reindex the data to put nans where required
# we don't extend the new index to start, end, since reference
# data has some lag time from the end it was requested from
# and it isn't necessary to keep the nans between uploads in db
Expand Down Expand Up @@ -349,14 +359,20 @@ def post_observation_data(api, observation, data, start, end):
# check for a non-standard variable label in extra_parameters
variable = extra_parameters.get('network_data_label',
observation.variable)
# check if the raw observation needs to be resampled before posting
resample_how = extra_parameters.get('resample_how', None)
try:
var_df = _prepare_data_to_post(data, variable, observation,
start, end)
start, end, resample_how)
except KeyError:
logger.error(f'{variable} could not be found in the data file '
f'from {data.index[0]} to {data.index[-1]}'
f'for Observation {observation.name}')
return
except AttributeError:
logger.error(f'{variable} could not be resampled using method '
f'{resample_how} for Observation {observation.name}')
return

# skip post id data is empty, if there are nans, should still post
if var_df.empty:
Expand Down
4 changes: 4 additions & 0 deletions solarforecastarbiter/io/reference_observations/midc.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,14 @@ def update_observation_data(api, sites, observations, start, end):
"""Post new observation data to all MIDC observations from
start to end.

Parameters
----------
api : solarforecastarbiter.io.api.APISession
An active Reference user session.
sites: list
List of all reference sites as Objects
observations: list of solarforecastarbiter.datamodel.Observation
List of all reference observations.
start : datetime
The beginning of the period to request data for.
end : datetime
Expand Down
Loading