-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add get_solargis iotools function #1969
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
0dda0e4
9d1151c
3a46cfb
ab5a903
0d5835c
12538ba
76edf44
a4c48b6
c4a9e8e
6c034d1
4a48ae1
b9a1862
d727406
8bce845
8260a1e
8fc8340
7bfba0d
982164b
b6ad284
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
"""Functions to retrieve and parse irradiance data from Solargis.""" | ||
|
||
import pandas as pd | ||
import requests | ||
from dataclasses import dataclass | ||
import io | ||
|
||
URL = 'https://solargis.info/ws/rest/datadelivery/request' | ||
|
||
|
||
TIME_RESOLUTION_MAP = { | ||
5: 'MIN_5', 10: 'MIN_10', 15: 'MIN_15', 30: 'MIN_30', 60: 'HOURLY', | ||
'PT05M': 'MIN_5', 'PT5M': 'MIN_5', 'PT10M': 'MIN_10', 'PT15M': 'MIN_15', | ||
'PT30': 'MIN_30', 'PT60M': 'HOURLY', 'PT1H': 'HOURLY', 'P1D': 'DAILY', | ||
'P1M': 'MONTHLY', 'P1Y': 'YEARLY'} | ||
|
||
|
||
@dataclass | ||
class ParameterMap: | ||
solargis_name: str | ||
pvlib_name: str | ||
conversion: callable = lambda x: x | ||
|
||
|
||
# define the conventions between Solargis and pvlib nomenclature and units | ||
VARIABLE_MAP = [ | ||
# Irradiance (unit varies based on time resolution) | ||
ParameterMap('GHI', 'ghi'), | ||
ParameterMap('GHI_C', 'ghi_clear'), # this is stated in documentation | ||
ParameterMap('GHIc', 'ghi_clear'), # this is used in practice | ||
ParameterMap('DNI', 'dni'), | ||
ParameterMap('DNI_C', 'dni_clear'), | ||
ParameterMap('DNIc', 'dni_clear'), | ||
ParameterMap('DIF', 'dhi'), | ||
ParameterMap('GTI', 'poa_global'), | ||
ParameterMap('GTI_C', 'poa_global_clear'), | ||
ParameterMap('GTIc', 'poa_global_clear'), | ||
# Solar position | ||
ParameterMap('SE', 'solar_elevation'), | ||
# SA -> solar_azimuth (degrees) (different convention) | ||
ParameterMap("SA", "solar_azimuth", lambda x: x + 180), | ||
# Weather / atmospheric parameters | ||
ParameterMap('TEMP', 'temp_air'), | ||
ParameterMap('TD', 'temp_dew'), | ||
# surface_pressure (hPa) -> pressure (Pa) | ||
ParameterMap('AP', 'pressure', lambda x: x*100), | ||
ParameterMap('RH', 'relative_humidity'), | ||
ParameterMap('WS', 'wind_speed'), | ||
ParameterMap('WD', 'wind_direction'), | ||
ParameterMap('INC', 'aoi'), # angle of incidence of direct irradiance | ||
# precipitable_water (kg/m2) -> precipitable_water (cm) | ||
ParameterMap('PWAT', 'precipitable_water', lambda x: x/10), | ||
] | ||
|
||
METADATA_FIELDS = [ | ||
'issued', 'site name', 'latitude', 'longitude', 'elevation', | ||
'summarization type', 'summarization period' | ||
] | ||
|
||
|
||
# Variables that use "-9" as nan values | ||
NA_9_COLUMNS = ['GHI', 'GHIc', 'DNI', 'DNIc', 'DIF', 'GTI', 'GIc', 'KT', 'PAR', | ||
'PREC', 'PWAT', 'SDWE', 'SFWE'] | ||
|
||
|
||
def get_solargis(latitude, longitude, start, end, variables, api_key, | ||
time_resolution, timestamp_type='center', tz='GMT+00', | ||
terrain_shading=True, url=URL, map_variables=True, | ||
timeout=30): | ||
""" | ||
Retrieve irradiance time series data from Solargis. | ||
|
||
The Solargis [1]_ API is described in [2]_. | ||
|
||
Parameters | ||
---------- | ||
latitude: float | ||
In decimal degrees, between -90 and 90, north is positive (ISO 19115) | ||
longitude: float | ||
In decimal degrees, between -180 and 180, east is positive (ISO 19115) | ||
start : datetime-like | ||
Start date of time series. | ||
end : datetime-like | ||
End date of time series. | ||
variables : list | ||
List of variables to request, see [2]_ for options. | ||
api_key : str | ||
API key. | ||
time_resolution : str, {'PT05M', 'PT10M', 'PT15M', 'PT30', 'PT1H', 'P1D', 'P1M', 'P1Y'} | ||
Time frequency in the time series data. | ||
timestamp_type : {'start', 'center', 'end'}, default: 'center' | ||
Labeling of time stamps of the return data. | ||
tz : str, default : 'GMT+00' | ||
Timezone of `start` and `end` in the format "GMT+hh" or "GMT-hh". | ||
terrain_shading : boolean, default: True | ||
Whether to account for horizon shading. | ||
url : str, default : :const:`pvlib.iotools.solargis.URL` | ||
Base url of Solargis API. | ||
map_variables : boolean, default: True | ||
When true, renames columns of the Dataframe to pvlib variable names | ||
where applicable. See variable :const:`VARIABLE_MAP`. | ||
timeout : int or float, default: 30 | ||
Time in seconds to wait for server response before timeout | ||
|
||
Returns | ||
------- | ||
data : DataFrame | ||
DataFrame containing time series data. | ||
meta : dict | ||
Dictionary containing metadata. | ||
|
||
Raises | ||
------ | ||
requests.HTTPError | ||
A message from the Solargis server if the request is rejected | ||
|
||
Notes | ||
----- | ||
Each XML request is limited to retrieving 31 days of data. | ||
|
||
The variable units depends on the time frequency, e.g., the unit for | ||
sub-hourly irradiance data is :math:`W/m^2`, for hourly data it is | ||
:math:`Wh/m^2`, and for daily data it is :math:`kWh/m^2`. | ||
|
||
References | ||
---------- | ||
.. [1] `Solargis <https://solargis.com>`_ | ||
.. [2] `Solargis API User Guide | ||
<https://solargis.atlassian.net/wiki/spaces/public/pages/7602367/Solargis+API+User+Guide>`_ | ||
|
||
Examples | ||
-------- | ||
>>> # Retrieve two days of irradiance data from Solargis | ||
>>> data, meta = response = pvlib.iotools.get_solargis( | ||
>>> latitude=48.61259, longitude=20.827079, | ||
>>> start='2022-01-01', end='2022-01-02', | ||
>>> variables=['GHI', 'DNI'], time_resolution='PT05M', api_key='demo') | ||
""" # noqa: E501 | ||
# Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted | ||
start = pd.to_datetime(start) | ||
end = pd.to_datetime(end) | ||
|
||
headers = {'Content-Type': 'application/xml'} | ||
|
||
# Solargis recommends creating a unique site_id for each location request. | ||
# The site_id does not impact the data retrieval and is used for debugging. | ||
site_id = f"latitude_{latitude}_longitude_{longitude}" | ||
|
||
request_xml = f'''<ws:dataDeliveryRequest | ||
dateFrom="{start.strftime('%Y-%m-%d')}" | ||
dateTo="{end.strftime('%Y-%m-%d')}" | ||
xmlns="http://geomodel.eu/schema/data/request" | ||
xmlns:ws="http://geomodel.eu/schema/ws/data" | ||
xmlns:geo="http://geomodel.eu/schema/common/geo" | ||
xmlns:pv="http://geomodel.eu/schema/common/pv" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> | ||
<site id="{site_id}" name="" lat="{latitude}" lng="{longitude}"> | ||
</site> | ||
<processing key="{' '.join(variables)}" | ||
summarization="{TIME_RESOLUTION_MAP[time_resolution].upper()}" | ||
terrainShading="{str(terrain_shading).lower()}"> | ||
<timestampType>{timestamp_type.upper()}</timestampType> | ||
<timeZone>{tz}</timeZone> | ||
</processing> | ||
</ws:dataDeliveryRequest>''' | ||
AdamRJensen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
response = requests.post(url + "?key=" + api_key, headers=headers, | ||
data=request_xml.encode('utf8'), timeout=timeout) | ||
|
||
if response.ok is False: | ||
raise requests.HTTPError(response.json()) | ||
|
||
# Parse metadata | ||
header = pd.read_xml(io.StringIO(response.text), parser='etree') | ||
meta_lines = header['metadata'].iloc[0].split('#') | ||
meta_lines = [line.strip() for line in meta_lines] | ||
meta = {} | ||
for line in meta_lines: | ||
if ':' in line: | ||
key = line.split(':')[0].lower() | ||
if key in METADATA_FIELDS: | ||
meta[key] = ':'.join(line.split(':')[1:]) | ||
meta['latitude'] = float(meta['latitude']) | ||
meta['longitude'] = float(meta['longitude']) | ||
meta['altitude'] = float(meta.pop('elevation').replace('m a.s.l.', '')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great to include the data version in the returned metadata too. Unfortunately it can't be extracted based on colons as you've done for the other fields (it looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree it would be nice, but judging it to be out of scope for now. |
||
|
||
# Parse data | ||
data = pd.read_xml(io.StringIO(response.text), xpath='.//doc:row', | ||
namespaces={'doc': 'http://geomodel.eu/schema/ws/data'}, | ||
parser='etree') | ||
data.index = pd.to_datetime(data['dateTime']) | ||
# when requesting one variable, it is necessary to convert dataframe to str | ||
data = data['values'].astype(str).str.split(' ', expand=True) | ||
data = data.astype(float) | ||
data.columns = header['columns'].iloc[0].split() | ||
|
||
# Replace "-9" with nan values for specific columns | ||
for variable in data.columns: | ||
if variable in NA_9_COLUMNS: | ||
data[variable] = data[variable].replace(-9, pd.NA) | ||
|
||
# rename and convert variables | ||
if map_variables: | ||
for variable in VARIABLE_MAP: | ||
if variable.solargis_name in data.columns: | ||
data.rename( | ||
columns={variable.solargis_name: variable.pvlib_name}, | ||
inplace=True | ||
) | ||
data[variable.pvlib_name] = data[ | ||
variable.pvlib_name].apply(variable.conversion) | ||
|
||
return data, meta |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import pandas as pd | ||
import pytest | ||
import pvlib | ||
import requests | ||
from ..conftest import (RERUNS, RERUNS_DELAY, assert_frame_equal, | ||
assert_index_equal) | ||
|
||
|
||
@pytest.fixture | ||
def hourly_index(): | ||
hourly_index = pd.date_range(start='2022-01-01 00:30+01:00', freq='60min', | ||
periods=24, name='dateTime') | ||
hourly_index.freq = None | ||
return hourly_index | ||
|
||
|
||
@pytest.fixture | ||
def hourly_index_start_utc(): | ||
hourly_index_left_utc = pd.date_range( | ||
start='2023-01-01 00:00+00:00', freq='30min', periods=24*2, | ||
name='dateTime') | ||
hourly_index_left_utc.freq = None | ||
return hourly_index_left_utc | ||
|
||
|
||
@pytest.fixture | ||
def hourly_dataframe(hourly_index): | ||
ghi = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 73.0, 152.0, 141.0, 105.0, | ||
62.0, 65.0, 62.0, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | ||
dni = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 30.0, 233.0, 301.0, 136.0, 32.0, | ||
0.0, 3.0, 77.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | ||
return pd.DataFrame(data={'ghi': ghi, 'dni': dni}, index=hourly_index) | ||
|
||
|
||
@pytest.mark.skipif(pd.__version__ < '1.3.0', | ||
reason='pd.read_xml is new as of 1.3.0') | ||
kandersolar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_solargis(hourly_dataframe): | ||
data, meta = pvlib.iotools.get_solargis( | ||
latitude=48.61259, longitude=20.827079, | ||
start='2022-01-01', end='2022-01-01', | ||
tz='GMT+01', variables=['GHI', 'DNI'], | ||
summarization='HOURLY', api_key='demo') | ||
assert_frame_equal(data, hourly_dataframe) | ||
|
||
|
||
@pytest.mark.skipif(pd.__version__ < '1.3.0', | ||
reason='pd.read_xml is new as of 1.3.0') | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_solargis_utc_start_timestamp(hourly_index_start_utc): | ||
data, meta = pvlib.iotools.get_solargis( | ||
latitude=48.61259, longitude=20.827079, | ||
start='2023-01-01', end='2023-01-01', | ||
variables=['GTI'], | ||
timestamp_type='start', | ||
summarization='MIN_30', | ||
map_variables=False, api_key='demo') | ||
assert 'GTI' in data.columns # assert that variables aren't mapped | ||
assert_index_equal(data.index, hourly_index_start_utc) | ||
|
||
|
||
@pytest.mark.skipif(pd.__version__ < '1.3.0', | ||
reason='pd.read_xml is new as of 1.3.0') | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_solargis_http_error(): | ||
# Test if HTTPError is raised if date outside range is specified | ||
with pytest.raises(requests.HTTPError, match="data coverage"): | ||
_, _ = pvlib.iotools.get_solargis( | ||
latitude=48.61259, longitude=20.827079, | ||
start='1920-01-01', end='1920-01-01', # date outside range | ||
variables=['GHI', 'DNI'], summarization='HOURLY', api_key='demo') |
Uh oh!
There was an error while loading. Please reload this page.