From fb7d41c22876182c4cf248f02e628fff626e522c Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:23:42 +0100 Subject: [PATCH 1/7] Update filestructure of get_srml --- pvlib/iotools/srml.py | 24 +++++---- pvlib/tests/iotools/test_srml.py | 83 +++++--------------------------- 2 files changed, 28 insertions(+), 79 deletions(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 01b835da6d..c8d171708c 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -5,6 +5,7 @@ import pandas as pd import urllib import warnings +import os from pvlib._deprecation import deprecated @@ -26,6 +27,8 @@ '937': 'temp_cell', } +URL = 'http://is-solardata01.uoregon.edu/Step3B_Original_Format/' + def read_srml(filename, map_variables=True): """ @@ -204,7 +207,7 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO', raw or processed data. For instance, `RO` designates raw, one minute data and `PO` designates processed one minute data. The availability of file types varies between sites. Below is a table of file types - and their time intervals. See [1] for site information. + and their time intervals. See [1]_ for site information. ============= ============ ================== time interval raw filetype processed filetype @@ -231,7 +234,7 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO', def get_srml(station, start, end, filetype='PO', map_variables=True, - url="http://solardat.uoregon.edu/download/Archive/"): + url=URL): """Request data from UoO SRML and read it into a Dataframe. The University of Oregon Solar Radiation Monitoring Laboratory (SRML) is @@ -242,7 +245,7 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, Parameters ---------- station : str - Two letter station abbreviation. + Three letter station abbreviation. start : datetime-like First day of the requested period end : datetime-like @@ -252,7 +255,7 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, map_variables : bool, default: True When true, renames columns of the DataFrame to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. - url : str, default: 'http://solardat.uoregon.edu/download/Archive/' + url : str, default: :const:`URL` API endpoint URL Returns @@ -292,6 +295,10 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, `http://solardat.uoregon.edu/StationIDCodes.html `_ """ + # prior to pvlib 0.10.3 the function used 2-letter abbreviations + if len(station) != 3: + raise ValueError('`station` should be a 3 letter station abbreviation') + # Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted start = pd.to_datetime(start) end = pd.to_datetime(end) @@ -299,11 +306,10 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, # Generate list of months months = pd.date_range( start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M') - months_str = months.strftime('%y%m') - - # Generate list of filenames - filenames = [f"{station}{filetype}{m}.txt" for m in months_str] + # Generate list of filenames (note basename uses two-letter abbreviation) + filenames = [f"{station}/{station}_{m.year}/{station[:2]}{filetype}{m.strftime('%y%m')}.txt" for m in months] # noqa: E501 + print(filenames) dfs = [] # Initialize list of monthly dataframes for f in filenames: try: @@ -316,6 +322,6 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, meta = {'filetype': filetype, 'station': station, - 'filenames': filenames} + 'filenames': [os.path.basename(f) for f in filenames]} return data, meta diff --git a/pvlib/tests/iotools/test_srml.py b/pvlib/tests/iotools/test_srml.py index 308ea3a3a1..929a54e35f 100644 --- a/pvlib/tests/iotools/test_srml.py +++ b/pvlib/tests/iotools/test_srml.py @@ -1,6 +1,7 @@ from numpy import isnan import pandas as pd import pytest +import os from pvlib.iotools import srml from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, @@ -9,16 +10,17 @@ srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt' +BASE_URL = 'http://is-solardata01.uoregon.edu/Step3B_Original_Format/' + def test_read_srml(): srml.read_srml(srml_testfile) -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_read_srml_remote(): - srml.read_srml('http://solardat.uoregon.edu/download/Archive/EUPO1801.txt') + srml.read_srml(os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt')) def test_read_srml_columns_exist(): @@ -47,11 +49,10 @@ def test_read_srml_nans_exist(): assert data['dni_0_flag'].iloc[1119] == 99 -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") @pytest.mark.parametrize('url,year,month', [ - ('http://solardat.uoregon.edu/download/Archive/EUPO1801.txt', + (os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt'), 2018, 1), - ('http://solardat.uoregon.edu/download/Archive/EUPO1612.txt', + (os.path.join(BASE_URL, 'EUO/EUO_2016/EUPO1612.txt'), 2016, 12), ]) @pytest.mark.remote_data @@ -78,78 +79,21 @@ def test__map_columns(column, expected): assert srml._map_columns(column) == expected -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_srml(): - url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt' + url = os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt') file_data = srml.read_srml(url) - requested, _ = srml.get_srml(station='EU', start='2018-01-01', + requested, _ = srml.get_srml(station='EUO', start='2018-01-01', end='2018-01-31') assert_frame_equal(file_data, requested) -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") -@fail_on_pvlib_version('0.11') -@pytest.mark.remote_data -@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_read_srml_month_from_solardat(): - url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt' - file_data = srml.read_srml(url) - with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): - requested = srml.read_srml_month_from_solardat('EU', 2018, 1) - assert file_data.equals(requested) - - -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") -@fail_on_pvlib_version('0.11') -@pytest.mark.remote_data -@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_15_minute_dt_index(): - with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): - data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ') - start = pd.Timestamp('20190401 00:00') - start = start.tz_localize('Etc/GMT+8') - end = pd.Timestamp('20190430 23:45') - end = end.tz_localize('Etc/GMT+8') - assert data.index[0] == start - assert data.index[-1] == end - assert (data.index[3::4].minute == 45).all() - - -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") -@fail_on_pvlib_version('0.11') -@pytest.mark.remote_data -@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_hourly_dt_index(): - with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): - data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH') - start = pd.Timestamp('19860401 00:00') - start = start.tz_localize('Etc/GMT+8') - end = pd.Timestamp('19860430 23:00') - end = end.tz_localize('Etc/GMT+8') - assert data.index[0] == start - assert data.index[-1] == end - assert (data.index.minute == 0).all() - - -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") -@pytest.mark.remote_data -@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_srml_hourly(): - data, meta = data, meta = srml.get_srml(station='CD', start='1986-04-01', - end='1986-05-31', filetype='PH') - expected_index = pd.date_range(start='1986-04-01', end='1986-05-31 23:59', - freq='1h', tz='Etc/GMT+8') - assert_index_equal(data.index, expected_index) - - -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_srml_minute(): data_read = srml.read_srml(srml_testfile) - data_get, meta = srml.get_srml(station='EU', start='2018-01-01', + data_get, meta = srml.get_srml(station='EUO', start='2018-01-01', end='2018-01-31', filetype='PO') expected_index = pd.date_range(start='2018-01-01', end='2018-01-31 23:59', freq='1min', tz='Etc/GMT+8') @@ -157,17 +101,16 @@ def test_get_srml_minute(): assert all(c in data_get.columns for c in data_read.columns) # Check that all indices in example file are present in remote file assert data_read.index.isin(data_get.index).all() - assert meta['station'] == 'EU' + assert meta['station'] == 'EUO' assert meta['filetype'] == 'PO' assert meta['filenames'] == ['EUPO1801.txt'] -@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023") @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_srml_nonexisting_month_warning(): - with pytest.warns(UserWarning, match='file was not found: EUPO0912.txt'): + with pytest.warns(UserWarning, match='file was not found: EUO/EUO_2009/EUPO0912.txt'): # noqa: E501 # Request data for a period where not all files exist - # Eugene (EU) station started reporting 1-minute data in January 2010 + # Eugene (EUO) station started reporting 1-minute data in January 2010 data, meta = data, meta = srml.get_srml( - station='EU', start='2009-12-01', end='2010-01-31', filetype='PO') + station='EUO', start='2009-12-01', end='2010-01-31', filetype='PO') From 143cab1218c807d93948c1c29e1f8d589a7b8059 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:36:08 +0100 Subject: [PATCH 2/7] Update iotools.rst --- docs/sphinx/source/reference/iotools.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index b41a30b203..38e563bf2a 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -19,7 +19,6 @@ of sources and file formats relevant to solar energy modeling. iotools.read_epw iotools.parse_epw iotools.read_srml - iotools.read_srml_month_from_solardat iotools.get_srml iotools.read_surfrad iotools.read_midc From 36d56c322b7501f93b296cab6ed55e5dbf02112b Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:36:10 +0100 Subject: [PATCH 3/7] Update __init__.py --- pvlib/iotools/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 9935719b29..da0b324fcb 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -1,7 +1,6 @@ from pvlib.iotools.tmy import read_tmy2, read_tmy3 # noqa: F401 from pvlib.iotools.epw import read_epw, parse_epw # noqa: F401 from pvlib.iotools.srml import read_srml # noqa: F401 -from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401 from pvlib.iotools.srml import get_srml # noqa: F401 from pvlib.iotools.surfrad import read_surfrad # noqa: F401 from pvlib.iotools.midc import read_midc # noqa: F401 From d029684a24b46a09b4dbcb76f462d85313ec7875 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:36:23 +0100 Subject: [PATCH 4/7] Add ValueError 2-letter test --- pvlib/tests/iotools/test_srml.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pvlib/tests/iotools/test_srml.py b/pvlib/tests/iotools/test_srml.py index 929a54e35f..b4a280624e 100644 --- a/pvlib/tests/iotools/test_srml.py +++ b/pvlib/tests/iotools/test_srml.py @@ -5,8 +5,7 @@ from pvlib.iotools import srml from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, - assert_frame_equal, fail_on_pvlib_version) -from pvlib._deprecation import pvlibDeprecationWarning + assert_frame_equal) srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt' @@ -114,3 +113,13 @@ def test_get_srml_nonexisting_month_warning(): # Eugene (EUO) station started reporting 1-minute data in January 2010 data, meta = data, meta = srml.get_srml( station='EUO', start='2009-12-01', end='2010-01-31', filetype='PO') + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_srml_nonexisting_month_warning(): + with pytest.raises(ValueError, match='should be a 3 letter station abbreviation'): # noqa: E501 + # Test that an error is raised when specifying a station using the old + # two-letter station code. E.g., Eugene, Oregon station used to be EU + # and now goes by EUO + data, meta = data, meta = srml.get_srml( + station='EU', start='2022-12-01', end='2022-01-31', filetype='PO') From 2339dba418bedb0983fce5f0a992694bcacc8dfc Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:37:37 +0100 Subject: [PATCH 5/7] Remove print statement --- pvlib/iotools/srml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index c8d171708c..ac2d48d810 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -309,7 +309,7 @@ def get_srml(station, start, end, filetype='PO', map_variables=True, # Generate list of filenames (note basename uses two-letter abbreviation) filenames = [f"{station}/{station}_{m.year}/{station[:2]}{filetype}{m.strftime('%y%m')}.txt" for m in months] # noqa: E501 - print(filenames) + dfs = [] # Initialize list of monthly dataframes for f in filenames: try: From 7a940625491b7a765dac060ffd7ac50a9e2eaee0 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:38:26 +0100 Subject: [PATCH 6/7] Update test name --- pvlib/tests/iotools/test_srml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_srml.py b/pvlib/tests/iotools/test_srml.py index b4a280624e..16c127cdc2 100644 --- a/pvlib/tests/iotools/test_srml.py +++ b/pvlib/tests/iotools/test_srml.py @@ -116,7 +116,7 @@ def test_get_srml_nonexisting_month_warning(): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_srml_nonexisting_month_warning(): +def test_get_srml_station_two_letter_error(): with pytest.raises(ValueError, match='should be a 3 letter station abbreviation'): # noqa: E501 # Test that an error is raised when specifying a station using the old # two-letter station code. E.g., Eugene, Oregon station used to be EU From 34b4169fc240a94df446d19bb24db4ffbf43bffb Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:53:18 +0100 Subject: [PATCH 7/7] Flake updates --- pvlib/tests/iotools/test_srml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_srml.py b/pvlib/tests/iotools/test_srml.py index 16c127cdc2..60bd2573ae 100644 --- a/pvlib/tests/iotools/test_srml.py +++ b/pvlib/tests/iotools/test_srml.py @@ -114,6 +114,7 @@ def test_get_srml_nonexisting_month_warning(): data, meta = data, meta = srml.get_srml( station='EUO', start='2009-12-01', end='2010-01-31', filetype='PO') + @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_srml_station_two_letter_error(): @@ -122,4 +123,4 @@ def test_get_srml_station_two_letter_error(): # two-letter station code. E.g., Eugene, Oregon station used to be EU # and now goes by EUO data, meta = data, meta = srml.get_srml( - station='EU', start='2022-12-01', end='2022-01-31', filetype='PO') + station='EU', start='2022-01-01', end='2022-01-31', filetype='PO')