Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sphinx/source/whatsnew/v0.8.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ Enhancements
Bug fixes
~~~~~~~~~
* Fixed unit and default value errors in :py:func:`pvlib.soiling.hsu`. (:pull:`XXX`)
* Handle NUL characters and fix version column dtype in
:py:func:`~pvlib.iotools.crn.read_crn`. (:issue:`1025`)

Testing
~~~~~~~
Expand Down
Binary file added pvlib/data/CRN_with_problems.txt
Binary file not shown.
12 changes: 8 additions & 4 deletions pvlib/iotools/crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# specify dtypes for potentially problematic values
DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'int64', 'int64', 'int64', 'int64', 'int64', 'str', 'float64', 'float64',
'float64', 'float64', 'float64', 'int64', 'float64', 'O', 'int64',
'float64', 'int64', 'float64', 'float64', 'int64', 'int64', 'float64',
'int64'
Expand Down Expand Up @@ -78,9 +78,13 @@ def read_crn(filename):
Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
"""

# read in data
# read in data. set fields with NUL characters to NaN
data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '),
widths=WIDTHS)
widths=WIDTHS, na_values=['\x00\x00\x00\x00\x00\x00'])
# at this point we only have NaNs from NUL characters, not -999 etc.
# these bad rows need to be removed so that dtypes can be set.
# NaNs require float dtype so we run into errors if we don't do this.
data = data.dropna(axis=0)
# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, DTYPES):
data[col] = data[col].astype(_dtype)
Expand All @@ -98,7 +102,7 @@ def read_crn(filename):
except TypeError:
pass

# set nans
# now we can set nans
for val in [-99, -999, -9999]:
data = data.where(data != val, np.nan)

Expand Down
59 changes: 45 additions & 14 deletions pvlib/tests/iotools/test_crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,39 @@


@pytest.fixture
def testfile():
return DATA_DIR / 'CRNS0101-05-2019-AZ_Tucson_11_W.txt'


def test_read_crn(testfile):
columns = [
def columns():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
'ghi_flag',
'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity',
'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5',
'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']


@pytest.fixture
def dtypes():
return [
dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
dtype('int64'), dtype('O'), dtype('float64'), dtype('float64'),
dtype('float64'), dtype('float64'), dtype('float64'),
dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
dtype('float64'), dtype('int64'), dtype('float64'),
dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
dtype('int64')]


@pytest.fixture
def testfile():
return DATA_DIR / 'CRNS0101-05-2019-AZ_Tucson_11_W.txt'


@pytest.fixture
def testfile_problems():
return DATA_DIR / 'CRN_with_problems.txt'


def test_read_crn(testfile, columns, dtypes):
index = pd.DatetimeIndex(['2019-01-01 16:10:00',
'2019-01-01 16:15:00',
'2019-01-01 16:20:00',
Expand All @@ -34,16 +55,26 @@ def test_read_crn(testfile):
0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
[53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
dtypes = [
dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'),
dtype('float64'), dtype('float64'), dtype('float64'),
dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
dtype('float64'), dtype('int64'), dtype('float64'),
dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
dtype('int64')]
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile)
assert_frame_equal(out, expected)


def test_read_crn_problems(testfile_problems, columns, dtypes):
# GH1025
index = pd.DatetimeIndex(['2020-07-06 12:00:00',
'2020-07-06 13:10:00'],
freq=None).tz_localize('UTC')
values = np.array([
[92821, 20200706, 1200, 20200706, 700, '3', -80.69, 28.62, 24.9,
0.0, 190.0, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0],
[92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62,
26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0,
1.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile_problems)
assert_frame_equal(out, expected)