Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sphinx/source/whatsnew/v0.8.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ Enhancements
Bug fixes
~~~~~~~~~
* Fixed unit and default value errors in :py:func:`pvlib.soiling.hsu`. (:pull:`XXX`)
* Handle NUL characters and fix version column dtype in
:py:func:`~pvlib.iotools.crn.read_crn`. (:issue:`1025`)

Testing
~~~~~~~
Expand Down
Binary file added pvlib/data/CRN_with_problems.txt
Binary file not shown.
22 changes: 18 additions & 4 deletions pvlib/iotools/crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# specify dtypes for potentially problematic values
DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'int64', 'int64', 'int64', 'int64', 'int64', 'str', 'float64', 'float64',
'float64', 'float64', 'float64', 'int64', 'float64', 'O', 'int64',
'float64', 'int64', 'float64', 'float64', 'int64', 'int64', 'float64',
'int64'
Expand Down Expand Up @@ -67,6 +67,13 @@ def read_crn(filename):
e.g. `SOLAR_RADIATION` becomes `ghi`. See the
`pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.

CRN files occasionally have a set of null characters on a line
instead of valid data. This function drops those lines. Sometimes
these null characters appear on a line of their own and sometimes
they occur on the same line as valid data. In the latter case, the
valid data will not be returned. Users may manually remove the null
characters and reparse the file if they need that line.

References
----------
.. [1] U.S. Climate Reference Network
Expand All @@ -78,9 +85,13 @@ def read_crn(filename):
Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
"""

# read in data
# read in data. set fields with NUL characters to NaN
data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '),
widths=WIDTHS)
widths=WIDTHS, na_values=['\x00\x00\x00\x00\x00\x00'])
# at this point we only have NaNs from NUL characters, not -999 etc.
# these bad rows need to be removed so that dtypes can be set.
# NaNs require float dtype so we run into errors if we don't do this.
data = data.dropna(axis=0)
# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, DTYPES):
data[col] = data[col].astype(_dtype)
Expand All @@ -98,8 +109,11 @@ def read_crn(filename):
except TypeError:
pass

# set nans
# Now we can set nans. This could be done a per column basis to be
# safer, since in principle a real -99 value could occur in a -9999
# column. Very unlikely to see that in the real world.
for val in [-99, -999, -9999]:
# consider replacing with .replace([-99, -999, -9999])
data = data.where(data != val, np.nan)

data = data.rename(columns=VARIABLE_MAP)
Expand Down
59 changes: 45 additions & 14 deletions pvlib/tests/iotools/test_crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,39 @@


@pytest.fixture
def testfile():
return DATA_DIR / 'CRNS0101-05-2019-AZ_Tucson_11_W.txt'


def test_read_crn(testfile):
columns = [
def columns():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
'ghi_flag',
'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity',
'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5',
'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']


@pytest.fixture
def dtypes():
return [
dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
dtype('int64'), dtype('O'), dtype('float64'), dtype('float64'),
dtype('float64'), dtype('float64'), dtype('float64'),
dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
dtype('float64'), dtype('int64'), dtype('float64'),
dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
dtype('int64')]


@pytest.fixture
def testfile():
return DATA_DIR / 'CRNS0101-05-2019-AZ_Tucson_11_W.txt'


@pytest.fixture
def testfile_problems():
return DATA_DIR / 'CRN_with_problems.txt'


def test_read_crn(testfile, columns, dtypes):
index = pd.DatetimeIndex(['2019-01-01 16:10:00',
'2019-01-01 16:15:00',
'2019-01-01 16:20:00',
Expand All @@ -34,16 +55,26 @@ def test_read_crn(testfile):
0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
[53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
dtypes = [
dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'),
dtype('float64'), dtype('float64'), dtype('float64'),
dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
dtype('float64'), dtype('int64'), dtype('float64'),
dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
dtype('int64')]
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile)
assert_frame_equal(out, expected)


def test_read_crn_problems(testfile_problems, columns, dtypes):
# GH1025
index = pd.DatetimeIndex(['2020-07-06 12:00:00',
'2020-07-06 13:10:00'],
freq=None).tz_localize('UTC')
values = np.array([
[92821, 20200706, 1200, 20200706, 700, '3', -80.69, 28.62, 24.9,
0.0, 190.0, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0],
[92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62,
26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0,
1.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile_problems)
assert_frame_equal(out, expected)