Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ relevant to solar energy modeling.
iotools.read_ecmwf_macc
iotools.get_ecmwf_macc
iotools.read_crn
iotools.read_solrad

A :py:class:`~pvlib.location.Location` object may be created from metadata
in some files.
Expand Down
1 change: 1 addition & 0 deletions docs/sphinx/source/whatsnew/v0.6.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ API Changes
Enhancements
~~~~~~~~~~~~
* Add US CRN data reader to `pvlib.iotools`.
* Add SOLRAD data reader to `pvlib.iotools`.

Bug fixes
~~~~~~~~~
Expand Down
6 changes: 6 additions & 0 deletions pvlib/data/abq19056.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Albuquerque
35.03796 -106.62211 1617 -7 version 1
2019 56 2 25 0 0 0.000 79.30 104.5 0 60.5 0 97.8 0 5.9 0 43.6 0 0.382 2.280 0.431 0.066
2019 56 2 25 0 1 0.017 79.49 102.6 0 59.7 0 96.2 0 5.7 0 43.6 0 0.764 1.800 0.431 0.063
2019 56 2 25 0 2 0.033 79.68 102.1 0 65.8 0 94.8 0 5.5 0 43.6 0 0.382 4.079 0.323 0.062
2019 56 2 25 0 3 0.050 79.87 102.6 0 76.3 0 -9999.9 0 5.3 0 43.6 0 0.509 1.920 0.215 0.059
6 changes: 6 additions & 0 deletions pvlib/data/msn19056.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Madison
43.07250 -89.41133 271 -6 version 1
2019 56 2 25 0 0 0.000 94.28 -2.3 0 0.0 0 0.4 0 -9999.9 1 -9999.9 1 187.2 0 265.6 0 265.3 0 0.000 0.000 0.000 -9999.900 0.002 26.000 27.000
2019 56 2 25 0 1 0.017 94.46 -2.3 0 0.0 0 0.1 0 -9999.9 1 -9999.9 1 188.2 0 265.6 0 265.3 0 0.133 0.128 0.223 -9999.900 0.001 26.000 72.000
2019 56 2 25 0 2 0.033 94.64 -2.7 0 -0.2 0 0.0 0 -9999.9 1 -9999.9 1 187.6 0 265.6 0 265.3 0 0.000 0.257 0.000 -9999.900 0.001 24.000 42.000
2019 56 2 25 0 3 0.050 94.82 -2.5 0 0.4 0 0.0 0 -9999.9 1 -9999.9 1 187.3 0 265.6 0 265.3 0 0.266 0.385 0.000 -9999.900 0.001 26.000 48.000
1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401
from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401
from pvlib.iotools.crn import read_crn # noqa: F401
from pvlib.iotools.solrad import read_solrad # noqa: F401
122 changes: 122 additions & 0 deletions pvlib/iotools/solrad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Functions to read data from the NOAA SOLRAD network.
"""

import numpy as np
import pandas as pd

# pvlib conventions
BASE_HEADERS = (
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag'
)

# following README_SOLRAD.txt variable names for remaining
STD_HEADERS = ('std_dw_psp', 'std_direct', 'std_diffuse', 'std_uvb')

HEADERS = BASE_HEADERS + STD_HEADERS

DPIR_HEADERS = ('dpir', 'dpir_flag', 'dpirc', 'dpirc_flag', 'dpird',
'dpird_flag')

MADISON_HEADERS = BASE_HEADERS + DPIR_HEADERS + STD_HEADERS + (
'std_dpir', 'std_dpirc', 'std_dpird')


# as specified in README_SOLRAD.txt file. excludes 1 space between columns
WIDTHS = [4, 3] + 4*[2] + [6, 6] + 5*[7, 1] + 4*[9]
MADISON_WIDTHS = [4, 3] + 4*[2] + [6, 6] + 8*[7, 1] + 7*[9]
# add 1 to make fields contiguous (required by pandas.read_fwf)
WIDTHS = [w + 1 for w in WIDTHS]
MADISON_WIDTHS = [w + 1 for w in MADISON_WIDTHS]
# no space after last column
WIDTHS[-1] -= 1
MADISON_WIDTHS[-1] -= 1

DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
'float64', 'float64']

MADISON_DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']


def read_solrad(filename):
"""
Read NOAA SOLRAD [1]_ [2]_ fixed-width file into pandas dataframe.

Parameters
----------
filename: str
filepath or url to read for the fixed-width file.

Returns
-------
data: Dataframe
A dataframe with DatetimeIndex and all of the variables in the
file.

Notes
-----
SOLRAD data resolution is described by the README_SOLRAD.txt:
"Before 1-jan. 2015 the data were reported as 3-min averages;
on and after 1-Jan. 2015, SOLRAD data are reported as 1-min.
averages of 1-sec. samples."
Here, missing data is flagged as NaN, rather than -9999.9.

References
----------
.. [1] NOAA SOLRAD Network
`https://www.esrl.noaa.gov/gmd/grad/solrad/index.html
<https://www.esrl.noaa.gov/gmd/grad/solrad/index.html>`_

.. [2] B. B. Hicks et. al., (1996), The NOAA Integrated Surface
Irradiance Study (ISIS). A New Surface Radiation Monitoring
Program. Bull. Amer. Meteor. Soc., 77, 2857-2864.
:doi:`10.1175/1520-0477(1996)077<2857:TNISIS>2.0.CO;2`
"""
if 'msn' in filename:
names = MADISON_HEADERS
widths = MADISON_WIDTHS
dtypes = MADISON_DTYPES
else:
names = HEADERS
widths = WIDTHS
dtypes = DTYPES

# read in data
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
widths=widths, na_values=-9999.9)

# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, dtypes):
ser = data[col].astype(_dtype)
if _dtype == 'float64':
# older verions of pandas/numpy read '-9999.9' as
# -9999.8999999999996 and fail to set nan in read_fwf,
# so manually set nan
ser = ser.where(ser > -9999, other=np.nan)
data[col] = ser

# set index
# columns do not have leading 0s, so must zfill(2) to comply
# with %m%d%H%M format
dts = data[['month', 'day', 'hour', 'minute']].astype(str).apply(
lambda x: x.str.zfill(2))
dtindex = pd.to_datetime(
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
dts['minute'], format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass

return data
106 changes: 106 additions & 0 deletions pvlib/test/test_solrad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import inspect
import os

import pandas as pd
from pandas.util.testing import assert_frame_equal
import numpy as np
from numpy import nan

import pytest

from pvlib.iotools import solrad


test_dir = os.path.dirname(
os.path.abspath(inspect.getfile(inspect.currentframe())))
testfile = os.path.join(test_dir, '../data/abq19056.dat')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be testfile = os.path.join(test_dir, 'data', '703165TY.csv') for cross-platform compatibility

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path you've specified would be pvlib/test/data/703165TY.csv, but the data is in pvlib/data/abq19056.dat. I am pretty sure that os.path handles the .. specification as needed for the platform. The appveyor builds do not object to this pattern.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course. I substituted a file I have to test. Doesn't work on Windows. C:\python\pvlib-dev\pvlib-python\pvlib\../data/abq19056.dat

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm... Appveyor tests run on Windows so I am quite confused.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm... the string is interpreted correctly when a file operation is performed, so it's OK as coded. I still think my suggestion is an improvement.


import os
import inspect

test_dir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))

print(test_dir)

testfile = os.path.join(test_dir, '../data/703165TY.csv')
print(testfile)

with open(testfile) as infile:
    r = infile.readline()
    print(r)

produces

C:\python\pvlib-dev\pvlib-python\pvlib\test
C:\python\pvlib-dev\pvlib-python\pvlib\test\../data/703165TY.csv
703165,"SAND POINT",AK,-9.0,55.317,-160.517,7

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is '..' acceptable? testfile = os.path.join(test_dir, '..', 'data', '703165TY.csv')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would maintain the OS-specific separator so the string doesn't have a mix of \ and /

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or use os.pardir to avoid the ..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I thought os.path.join was smarter than it was because the tests have never complained about that pattern in the past. Looking forward to the Python 3 only days and using pathlib.

testfile_mad = os.path.join(test_dir, '../data/msn19056.dat')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above comment



columns = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'std_dw_psp', 'std_direct',
'std_diffuse', 'std_uvb']
index = pd.DatetimeIndex(['2019-02-25 00:00:00',
'2019-02-25 00:01:00',
'2019-02-25 00:02:00',
'2019-02-25 00:03:00'],
freq=None).tz_localize('UTC')
values = np.array([
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 0.000e+00,
0.000e+00, 7.930e+01, 1.045e+02, 0.000e+00, 6.050e+01, 0.000e+00,
9.780e+01, 0.000e+00, 5.900e+00, 0.000e+00, 4.360e+01, 0.000e+00,
3.820e-01, 2.280e+00, 4.310e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 1.000e+00,
1.700e-02, 7.949e+01, 1.026e+02, 0.000e+00, 5.970e+01, 0.000e+00,
9.620e+01, 0.000e+00, 5.700e+00, 0.000e+00, 4.360e+01, 0.000e+00,
7.640e-01, 1.800e+00, 4.310e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 2.000e+00,
3.300e-02, 7.968e+01, 1.021e+02, 0.000e+00, 6.580e+01, 0.000e+00,
9.480e+01, 0.000e+00, 5.500e+00, 0.000e+00, 4.360e+01, 0.000e+00,
3.820e-01, 4.079e+00, 3.230e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 3.000e+00,
5.000e-02, 7.987e+01, 1.026e+02, 0.000e+00, 7.630e+01, 0.000e+00,
nan, 0.000e+00, 5.300e+00, 0.000e+00, 4.360e+01, 0.000e+00,
5.090e-01, 1.920e+00, 2.150e-01, 5.000e-02]])
dtypes = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
'float64', 'float64']

columns_mad = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'dpir', 'dpir_flag',
'dpirc', 'dpirc_flag', 'dpird', 'dpird_flag', 'std_dw_psp',
'std_direct', 'std_diffuse', 'std_uvb', 'std_dpir', 'std_dpirc',
'std_dpird']
values_mad = np.array([
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
0.000e+00, 0.000e+00, 9.428e+01, -2.300e+00, 0.000e+00,
0.000e+00, 0.000e+00, 4.000e-01, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.872e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
0.000e+00, 0.000e+00, nan, 2.000e-03, 2.600e+01,
2.700e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
1.000e+00, 1.700e-02, 9.446e+01, -2.300e+00, 0.000e+00,
0.000e+00, 0.000e+00, 1.000e-01, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.882e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 1.330e-01,
1.280e-01, 2.230e-01, nan, 1.000e-03, 2.600e+01,
7.200e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
2.000e+00, 3.300e-02, 9.464e+01, -2.700e+00, 0.000e+00,
-2.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.876e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
2.570e-01, 0.000e+00, nan, 1.000e-03, 2.400e+01,
4.200e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
3.000e+00, 5.000e-02, 9.482e+01, -2.500e+00, 0.000e+00,
4.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.873e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 2.660e-01,
3.850e-01, 0.000e+00, nan, 1.000e-03, 2.600e+01,
4.800e+01]])
dtypes_mad = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']


@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
(testfile, index, columns, values, dtypes),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
])
def test_read_solrad(testfile, index, columns, values, dtypes):
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = solrad.read_solrad(testfile)
assert_frame_equal(out, expected)