Skip to content

Commit be59498

Browse files
Add function for accessing MERRA-2 (#2572)
* function, tests, whatsnew, docs * lint * simplify tz handling * make new earthdata secrets accessible to tests * add test for tz-aware inputs * add tests for HTTPError on bad inputs * add a few more variables to docstring * Apply suggestions from code review Co-authored-by: Adam R. Jensen <[email protected]> * add link to datasets; add table with variable description * tweak tests * add LWGNT, LWGEM to variable map and docstring * change pvlib names to "longwave_net", "longwave_up" * add LWGAB --------- Co-authored-by: Adam R. Jensen <[email protected]>
1 parent bbf301e commit be59498

File tree

6 files changed

+336
-1
lines changed

6 files changed

+336
-1
lines changed

docs/sphinx/source/reference/iotools.rst

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,6 @@ lower quality.
233233

234234
iotools.read_crn
235235

236-
237236
ECMWF ERA5
238237
^^^^^^^^^^
239238

@@ -244,6 +243,17 @@ A global reanalysis dataset providing weather and solar resource data.
244243

245244
iotools.get_era5
246245

246+
MERRA-2
247+
^^^^^^^
248+
249+
A global reanalysis dataset providing weather, aerosol, and solar irradiance
250+
data.
251+
252+
.. autosummary::
253+
:toctree: generated/
254+
255+
iotools.get_merra2
256+
247257

248258
Generic data file readers
249259
-------------------------

docs/sphinx/source/whatsnew/v0.13.2.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ Enhancements
4242
installed. (:issue:`2497`, :pull:`2571`)
4343
* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
4444
ERA5 reanalysis data. (:pull:`2573`)
45+
* Add :py:func:`~pvlib.iotools.get_merra2`, a function for accessing
46+
MERRA-2 reanalysis data. (:pull:`2572`)
4547

4648
Documentation
4749
~~~~~~~~~~~~~
@@ -77,3 +79,4 @@ Contributors
7779
* Will Hobbs (:ghuser:`williamhobbs`)
7880
* Cliff Hansen (:ghuser:`cwhanse`)
7981
* Joseph Radford (:ghuser:`josephradford`)
82+
* Kevin Anderson (:ghuser:`kandersolar`)

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@
4343
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
4444
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
4545
from pvlib.iotools.era5 import get_era5 # noqa: F401
46+
from pvlib.iotools.merra2 import get_merra2 # noqa: F401

pvlib/iotools/merra2.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import pandas as pd
2+
import requests
3+
from io import StringIO
4+
5+
6+
VARIABLE_MAP = {
7+
'SWGDN': 'ghi',
8+
'SWGDNCLR': 'ghi_clear',
9+
'ALBEDO': 'albedo',
10+
'LWGNT': 'longwave_net',
11+
'LWGEM': 'longwave_up',
12+
'LWGAB': 'longwave_down',
13+
'T2M': 'temp_air',
14+
'T2MDEW': 'temp_dew',
15+
'PS': 'pressure',
16+
'TOTEXTTAU': 'aod550',
17+
}
18+
19+
20+
def get_merra2(latitude, longitude, start, end, username, password, dataset,
21+
variables, map_variables=True):
22+
"""
23+
Retrieve MERRA-2 time-series irradiance and meteorological reanalysis data
24+
from NASA's GESDISC data archive.
25+
26+
MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly
27+
resolution on a 0.5° x 0.625° global grid.
28+
29+
Access must be granted to the GESDISC data archive before EarthData
30+
credentials will work. See [2]_ for instructions.
31+
32+
Parameters
33+
----------
34+
latitude : float
35+
In decimal degrees, north is positive (ISO 19115).
36+
longitude: float
37+
In decimal degrees, east is positive (ISO 19115).
38+
start : datetime like or str
39+
First timestamp of the requested period. If a timezone is not
40+
specified, UTC is assumed.
41+
end : datetime like or str
42+
Last timestamp of the requested period. If a timezone is not
43+
specified, UTC is assumed. Must be in the same year as ``start``.
44+
username : str
45+
NASA EarthData username.
46+
password : str
47+
NASA EarthData password.
48+
dataset : str
49+
Dataset name (with version), e.g. "M2T1NXRAD.5.12.4".
50+
variables : list of str
51+
List of variable names to retrieve. See the documentation of the
52+
specific dataset you are accessing for options.
53+
map_variables : bool, default True
54+
When true, renames columns of the DataFrame to pvlib variable names
55+
where applicable. See variable :const:`VARIABLE_MAP`.
56+
57+
Raises
58+
------
59+
ValueError
60+
If ``start`` and ``end`` are in different years, when converted to UTC.
61+
62+
Returns
63+
-------
64+
data : pd.DataFrame
65+
Time series data. The index corresponds to the middle of the interval.
66+
meta : dict
67+
Metadata.
68+
69+
Notes
70+
-----
71+
The following datasets provide quantities useful for PV modeling:
72+
73+
+------------------------------------+-----------+---------------+
74+
| Dataset | Variable | pvlib name |
75+
+====================================+===========+===============+
76+
| `M2T1NXRAD.5.12.4 <M2T1NXRAD_>`_ | SWGDN | ghi |
77+
| +-----------+---------------+
78+
| | SWGDNCLR | ghi_clear |
79+
| +-----------+---------------+
80+
| | ALBEDO | albedo |
81+
| +-----------+---------------+
82+
| | LWGAB | longwave_down |
83+
| +-----------+---------------+
84+
| | LWGNT | longwave_net |
85+
| +-----------+---------------+
86+
| | LWGEM | longwave_up |
87+
+------------------------------------+-----------+---------------+
88+
| `M2T1NXSLV.5.12.4 <M2T1NXSLV_>`_ | T2M | temp_air |
89+
| +-----------+---------------+
90+
| | U10 | n/a |
91+
| +-----------+---------------+
92+
| | V10 | n/a |
93+
| +-----------+---------------+
94+
| | T2MDEW | temp_dew |
95+
| +-----------+---------------+
96+
| | PS | pressure |
97+
| +-----------+---------------+
98+
| | TO3 | n/a |
99+
| +-----------+---------------+
100+
| | TQV | n/a |
101+
+------------------------------------+-----------+---------------+
102+
| `M2T1NXAER.5.12.4 <M2T1NXAER_>`_ | TOTEXTTAU | aod550 |
103+
| +-----------+---------------+
104+
| | TOTSCATAU | n/a |
105+
| +-----------+---------------+
106+
| | TOTANGSTR | n/a |
107+
+------------------------------------+-----------+---------------+
108+
109+
.. _M2T1NXRAD: https://disc.gsfc.nasa.gov/datasets/M2T1NXRAD_5.12.4/summary
110+
.. _M2T1NXSLV: https://disc.gsfc.nasa.gov/datasets/M2T1NXSLV_5.12.4/summary
111+
.. _M2T1NXAER: https://disc.gsfc.nasa.gov/datasets/M2T1NXAER_5.12.4/summary
112+
113+
A complete list of datasets and their documentation is available at [3]_.
114+
115+
Note that MERRA2 does not currently provide DNI or DHI.
116+
117+
References
118+
----------
119+
.. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/
120+
.. [2] https://disc.gsfc.nasa.gov/earthdata-login
121+
.. [3] https://disc.gsfc.nasa.gov/datasets?project=MERRA-2
122+
"""
123+
124+
# general API info here:
125+
# https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html # noqa: E501
126+
127+
def _to_utc_dt_notz(dt):
128+
dt = pd.to_datetime(dt)
129+
if dt.tzinfo is not None:
130+
# convert to utc, then drop tz so that isoformat() is clean
131+
dt = dt.tz_convert("UTC").tz_localize(None)
132+
return dt
133+
134+
start = _to_utc_dt_notz(start)
135+
end = _to_utc_dt_notz(end)
136+
137+
if (year := start.year) != end.year:
138+
raise ValueError("start and end must be in the same year (in UTC)")
139+
140+
url = (
141+
"https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/"
142+
f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml"
143+
)
144+
145+
parameters = {
146+
'var': ",".join(variables),
147+
'latitude': latitude,
148+
'longitude': longitude,
149+
'time_start': start.isoformat() + "Z",
150+
'time_end': end.isoformat() + "Z",
151+
'accept': 'csv',
152+
}
153+
154+
auth = (username, password)
155+
156+
with requests.Session() as session:
157+
session.auth = auth
158+
login = session.request('get', url, params=parameters)
159+
response = session.get(login.url, auth=auth, params=parameters)
160+
161+
response.raise_for_status()
162+
163+
content = response.content.decode('utf-8')
164+
buffer = StringIO(content)
165+
df = pd.read_csv(buffer)
166+
167+
df.index = pd.to_datetime(df['time'])
168+
169+
meta = {}
170+
meta['dataset'] = dataset
171+
meta['station'] = df['station'].values[0]
172+
meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0]
173+
meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0]
174+
175+
# drop the non-data columns
176+
dropcols = ['time', 'station', 'latitude[unit="degrees_north"]',
177+
'longitude[unit="degrees_east"]']
178+
df = df.drop(columns=dropcols)
179+
180+
# column names are like T2M[unit="K"] by default. extract the unit
181+
# for the metadata, then rename col to just T2M
182+
units = {}
183+
rename = {}
184+
for col in df.columns:
185+
name, _ = col.split("[", maxsplit=1)
186+
unit = col.split('"')[1]
187+
units[name] = unit
188+
rename[col] = name
189+
190+
meta['units'] = units
191+
df = df.rename(columns=rename)
192+
193+
if map_variables:
194+
df = df.rename(columns=VARIABLE_MAP)
195+
196+
return df, meta

tests/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,20 @@ def nrel_api_key():
143143
reason='requires ECMWF credentials')
144144

145145

146+
try:
147+
# Attempt to load NASA EarthData credentials used for testing
148+
# pvlib.iotools.get_merra2
149+
earthdata_username = os.environ["EARTHDATA_USERNAME"]
150+
earthdata_password = os.environ["EARTHDATA_PASSWORD"]
151+
has_earthdata_credentials = True
152+
except KeyError:
153+
has_earthdata_credentials = False
154+
155+
requires_earthdata_credentials = pytest.mark.skipif(
156+
not has_earthdata_credentials,
157+
reason='requires EarthData credentials')
158+
159+
146160
try:
147161
import statsmodels # noqa: F401
148162
has_statsmodels = True

tests/iotools/test_merra2.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
tests for pvlib/iotools/merra2.py
3+
"""
4+
5+
import pandas as pd
6+
import pytest
7+
import pvlib
8+
import os
9+
import requests
10+
from tests.conftest import RERUNS, RERUNS_DELAY, requires_earthdata_credentials
11+
12+
13+
@pytest.fixture
14+
def params():
15+
earthdata_username = os.environ["EARTHDATA_USERNAME"]
16+
earthdata_password = os.environ["EARTHDATA_PASSWORD"]
17+
18+
return {
19+
'latitude': 40.01, 'longitude': -80.01,
20+
'start': '2020-06-01 15:00', 'end': '2020-06-01 20:00',
21+
'dataset': 'M2T1NXRAD.5.12.4', 'variables': ['ALBEDO', 'SWGDN'],
22+
'username': earthdata_username, 'password': earthdata_password,
23+
}
24+
25+
26+
@pytest.fixture
27+
def expected():
28+
index = pd.date_range("2020-06-01 15:30", "2020-06-01 20:30", freq="h",
29+
tz="UTC")
30+
index.name = 'time'
31+
albedo = [0.163931, 0.1609407, 0.1601474, 0.1612476, 0.164664, 0.1711341]
32+
ghi = [ 930., 1002.75, 1020.25, 981.25, 886.5, 743.5]
33+
df = pd.DataFrame({'albedo': albedo, 'ghi': ghi}, index=index)
34+
return df
35+
36+
37+
@pytest.fixture
38+
def expected_meta():
39+
return {
40+
'dataset': 'M2T1NXRAD.5.12.4',
41+
'station': 'GridPointRequestedAt[40.010N_80.010W]',
42+
'latitude': 40.0,
43+
'longitude': -80.0,
44+
'units': {'ALBEDO': '1', 'SWGDN': 'W m-2'}
45+
}
46+
47+
48+
@requires_earthdata_credentials
49+
@pytest.mark.remote_data
50+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
51+
def test_get_merra2(params, expected, expected_meta):
52+
df, meta = pvlib.iotools.get_merra2(**params)
53+
pd.testing.assert_frame_equal(df, expected, check_freq=False)
54+
assert meta == expected_meta
55+
56+
57+
@requires_earthdata_credentials
58+
@pytest.mark.remote_data
59+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
60+
def test_get_merra2_map_variables(params, expected, expected_meta):
61+
df, meta = pvlib.iotools.get_merra2(**params, map_variables=False)
62+
expected = expected.rename(columns={'albedo': 'ALBEDO', 'ghi': 'SWGDN'})
63+
pd.testing.assert_frame_equal(df, expected, check_freq=False)
64+
assert meta == expected_meta
65+
66+
67+
def test_get_merra2_error():
68+
with pytest.raises(ValueError, match='must be in the same year'):
69+
pvlib.iotools.get_merra2(40, -80, '2019-12-31', '2020-01-02',
70+
username='anything', password='anything',
71+
dataset='anything', variables=[])
72+
73+
74+
@requires_earthdata_credentials
75+
@pytest.mark.remote_data
76+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
77+
def test_get_merra2_timezones(params, expected, expected_meta):
78+
# check with tz-aware start/end inputs
79+
for key in ['start', 'end']:
80+
dt = pd.to_datetime(params[key])
81+
params[key] = dt.tz_localize('UTC').tz_convert('Etc/GMT+5')
82+
df, meta = pvlib.iotools.get_merra2(**params)
83+
pd.testing.assert_frame_equal(df, expected, check_freq=False)
84+
assert meta == expected_meta
85+
86+
87+
@requires_earthdata_credentials
88+
@pytest.mark.remote_data
89+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
90+
def test_get_merra2_bad_credentials(params, expected, expected_meta):
91+
params['username'] = 'nonexistent'
92+
with pytest.raises(requests.exceptions.HTTPError, match='Unauthorized'):
93+
pvlib.iotools.get_merra2(**params)
94+
95+
96+
@requires_earthdata_credentials
97+
@pytest.mark.remote_data
98+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
99+
def test_get_merra2_bad_dataset(params, expected, expected_meta):
100+
params['dataset'] = 'nonexistent'
101+
with pytest.raises(requests.exceptions.HTTPError, match='404'):
102+
pvlib.iotools.get_merra2(**params)
103+
104+
105+
@requires_earthdata_credentials
106+
@pytest.mark.remote_data
107+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
108+
def test_get_merra2_bad_variables(params, expected, expected_meta):
109+
params['variables'] = ['nonexistent']
110+
with pytest.raises(requests.exceptions.HTTPError, match='400'):
111+
pvlib.iotools.get_merra2(**params)

0 commit comments

Comments
 (0)