Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/sphinx/source/whatsnew/v0.9.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`)

Bug fixes
~~~~~~~~~
Expand All @@ -22,7 +23,8 @@ Bug fixes
argument was not being passed to the ``optimalinclination`` request parameter (:pull:`1356`)
* Fixed bug in :py:func:`pvlib.bifacial.pvfactors_timeseries` where scalar ``surface_tilt``
and ``surface_azimuth`` inputs caused an error (:issue:`1127`, :issue:`1332`, :pull:`1361`)

* Added -99999 to list of values to map to nan in :func:`~pvlib.iotools.read_crn`
(:issue:`1372`, :pull:`1368`)

Testing
~~~~~~~
Expand Down
Binary file modified pvlib/data/CRN_with_problems.txt
Binary file not shown.
37 changes: 19 additions & 18 deletions pvlib/iotools/crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG'
)

VARIABLE_MAP = {
CRN_VARIABLE_MAP = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surfrad.py has VARIABLE_MAP while midc.py has MIDC_VARIABLE_MAP, so either the original or the change is consistent with existing code. I prefer just VARIABLE_MAP since it's simple and already specific to the module. Open to other ideas.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I can get behind consistently using VARIABLE_MAP - though I have to admit that I might have done a similar change in previous PRs.

'LONGITUDE': 'longitude',
'LATITUDE': 'latitude',
'AIR_TEMPERATURE': 'temp_air',
Expand Down Expand Up @@ -40,15 +40,22 @@
]


def read_crn(filename):
"""
Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is
described in [1]_ and [2]_.
def read_crn(filename, map_variables=True):
"""Read a NOAA USCRN fixed-width file into a pandas dataframe.

The CRN network consists of over 100 meteorological stations covering the
U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to
provide long-term measurements of temperature, precipitation, and soil
moisture and temperature. Additionally, global horizontal irradiance (GHI)
is measured at each site using a photodiode pyranometer.

Parameters
----------
filename: str, path object, or file-like
filepath or url to read for the fixed-width file.
map_variables: boolean, default: True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`CRN_VARIABLE_MAP`.

Returns
-------
Expand All @@ -60,12 +67,12 @@ def read_crn(filename):
-----
CRN files contain 5 minute averages labeled by the interval ending
time. Here, missing data is flagged as NaN, rather than the lowest
possible integer for a field (e.g. -999 or -99). Air temperature in
deg C. Wind speed in m/s at a height of 1.5 m above ground level.
possible integer for a field (e.g. -999 or -99). Air temperature is in
deg C and wind speed is in m/s at a height of 1.5 m above ground level.

Variables corresponding to standard pvlib variables are renamed,
Variables corresponding to standard pvlib variables are by default renamed,
e.g. `SOLAR_RADIATION` becomes `ghi`. See the
`pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
:const:`pvlib.iotools.crn.CRN_VARIABLE_MAP` dict for the complete mapping.

CRN files occasionally have a set of null characters on a line
instead of valid data. This function drops those lines. Sometimes
Expand Down Expand Up @@ -103,19 +110,13 @@ def read_crn(filename):
dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4),
format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass
Comment on lines -106 to -110
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too bad I neglected to indicate the versions that needed this. I think the tests are comprehensive enough that we can safely remove this if they still pass.


# Now we can set nans. This could be done a per column basis to be
# safer, since in principle a real -99 value could occur in a -9999
# column. Very unlikely to see that in the real world.
for val in [-99, -999, -9999]:
# consider replacing with .replace([-99, -999, -9999])
data = data.where(data != val, np.nan)
data = data.replace([-99, -999, -9999, -99999], np.nan)

data = data.rename(columns=VARIABLE_MAP)
if map_variables:
data = data.rename(columns=CRN_VARIABLE_MAP)

return data
32 changes: 24 additions & 8 deletions pvlib/tests/iotools/test_crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from numpy import dtype, nan
import pytest
from pvlib.iotools import crn
from ..conftest import DATA_DIR, assert_frame_equal
from ..conftest import DATA_DIR, assert_frame_equal, assert_index_equal


@pytest.fixture
def columns():
def columns_mapped():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
Expand All @@ -17,6 +17,16 @@ def columns():
'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']


@pytest.fixture
def columns_unmapped():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION',
'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE',
'ST_FLAG', 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5',
'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG']


@pytest.fixture
def dtypes():
return [
Expand All @@ -39,7 +49,7 @@ def testfile_problems():
return DATA_DIR / 'CRN_with_problems.txt'


def test_read_crn(testfile, columns, dtypes):
def test_read_crn(testfile, columns_mapped, dtypes):
index = pd.DatetimeIndex(['2019-01-01 16:10:00',
'2019-01-01 16:15:00',
'2019-01-01 16:20:00',
Expand All @@ -54,25 +64,31 @@ def test_read_crn(testfile, columns, dtypes):
0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
[53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
expected = pd.DataFrame(values, columns=columns_mapped, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile)
assert_frame_equal(out, expected)


def test_read_crn_problems(testfile_problems, columns, dtypes):
# Test map_variables=False returns correct column names
def test_read_crn_map_variables(testfile, columns_unmapped, dtypes):
out = crn.read_crn(testfile, map_variables=False)
assert_index_equal(out.columns, pd.Index(columns_unmapped))


def test_read_crn_problems(testfile_problems, columns_mapped, dtypes):
# GH1025
index = pd.DatetimeIndex(['2020-07-06 12:00:00',
'2020-07-06 13:10:00'],
freq=None).tz_localize('UTC')
values = np.array([
[92821, 20200706, 1200, 20200706, 700, '3', -80.69, 28.62, 24.9,
0.0, 190.0, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0],
[92821, 20200706, 1200, 20200706, 700, 3.0, -80.69, 28.62, 24.9,
0.0, np.nan, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0],
[92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62,
26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0,
1.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
expected = pd.DataFrame(values, columns=columns_mapped, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile_problems)
Expand Down