22"""
33
44import pandas as pd
5- import numpy as np
65
76
8- HEADERS = (
9- 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE '
10- 'AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG '
11- 'SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG '
12- 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG'
13- )
7+ HEADERS = [
8+ 'WBANNO' , ' UTC_DATE' , ' UTC_TIME' , ' LST_DATE' , ' LST_TIME' , ' CRX_VN' ,
9+ 'LONGITUDE' , 'LATITUDE' , 'AIR_TEMPERATURE' , 'PRECIPITATION' ,
10+ 'SOLAR_RADIATION' , 'SR_FLAG' , ' SURFACE_TEMPERATURE' , ' ST_TYPE' , ' ST_FLAG' ,
11+ 'RELATIVE_HUMIDITY' , 'RH_FLAG' , 'SOIL_MOISTURE_5' , 'SOIL_TEMPERATURE_5' ,
12+ 'WETNESS' , 'WET_FLAG' , 'WIND_1_5' , 'WIND_FLAG' ]
1413
1514VARIABLE_MAP = {
1615 'LONGITUDE' : 'longitude' ,
2423 'WIND_FLAG' : 'wind_speed_flag'
2524}
2625
26+ NAN_DICT = {
27+ 'CRX_VN' : - 99999 ,
28+ 'AIR_TEMPERATURE' : - 9999 ,
29+ 'PRECIPITATION' : - 9999 ,
30+ 'SOLAR_RADIATION' : - 99999 ,
31+ 'SURFACE_TEMPERATURE' : - 9999 ,
32+ 'RELATIVE_HUMIDITY' : - 9999 ,
33+ 'SOIL_MOISTURE_5' : - 99 ,
34+ 'SOIL_TEMPERATURE_5' : - 9999 ,
35+ 'WETNESS' : - 9999 ,
36+ 'WIND_1_5' : - 99 }
37+
38+ # Add NUL characters to possible NaN values for all columns
39+ NAN_DICT = {k : [v , '\x00 \x00 \x00 \x00 \x00 \x00 ' ] for k , v in NAN_DICT .items ()}
40+
2741# as specified in CRN README.txt file. excludes 1 space between columns
2842WIDTHS = [5 , 8 , 4 , 8 , 4 , 6 , 7 , 7 , 7 , 7 , 6 , 1 , 7 , 1 , 1 , 5 , 1 , 7 , 7 , 5 , 1 , 6 , 1 ]
2943# add 1 to make fields contiguous (required by pandas.read_fwf)
4054]
4155
4256
43- def read_crn (filename ):
44- """
45- Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is
46- described in [1]_ and [2]_.
57+ def read_crn (filename , map_variables = True ):
58+ """Read a NOAA USCRN fixed-width file into a pandas dataframe.
59+
60+ The CRN network consists of over 100 meteorological stations covering the
61+ U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to
62+ provide long-term measurements of temperature, precipitation, and soil
63+ moisture and temperature. Additionally, global horizontal irradiance (GHI)
64+ is measured at each site using a photodiode pyranometer.
4765
4866 Parameters
4967 ----------
5068 filename: str, path object, or file-like
5169 filepath or url to read for the fixed-width file.
70+ map_variables: boolean, default: True
71+ When true, renames columns of the Dataframe to pvlib variable names
72+ where applicable. See variable :const:`VARIABLE_MAP`.
5273
5374 Returns
5475 -------
@@ -60,12 +81,12 @@ def read_crn(filename):
6081 -----
6182 CRN files contain 5 minute averages labeled by the interval ending
6283 time. Here, missing data is flagged as NaN, rather than the lowest
63- possible integer for a field (e.g. -999 or -99). Air temperature in
64- deg C. Wind speed in m/s at a height of 1.5 m above ground level.
84+ possible integer for a field (e.g. -999 or -99). Air temperature is in
85+ deg C and wind speed is in m/s at a height of 1.5 m above ground level.
6586
66- Variables corresponding to standard pvlib variables are renamed,
87+ Variables corresponding to standard pvlib variables are by default renamed,
6788 e.g. `SOLAR_RADIATION` becomes `ghi`. See the
68- `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
89+ :const: `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
6990
7091 CRN files occasionally have a set of null characters on a line
7192 instead of valid data. This function drops those lines. Sometimes
@@ -85,16 +106,13 @@ def read_crn(filename):
85106 Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
86107 """
87108
88- # read in data. set fields with NUL characters to NaN
89- data = pd .read_fwf (filename , header = None , names = HEADERS .split (' ' ),
90- widths = WIDTHS , na_values = ['\x00 \x00 \x00 \x00 \x00 \x00 ' ])
91- # at this point we only have NaNs from NUL characters, not -999 etc.
92- # these bad rows need to be removed so that dtypes can be set.
93- # NaNs require float dtype so we run into errors if we don't do this.
94- data = data .dropna (axis = 0 )
95- # loop here because dtype kwarg not supported in read_fwf until 0.20
96- for (col , _dtype ) in zip (data .columns , DTYPES ):
97- data [col ] = data [col ].astype (_dtype )
109+ # read in data
110+ data = pd .read_fwf (filename , header = None , names = HEADERS , widths = WIDTHS ,
111+ na_values = NAN_DICT )
112+ # Remove rows with all nans
113+ data = data .dropna (axis = 0 , how = 'all' )
114+ # set dtypes here because dtype kwarg not supported in read_fwf until 0.20
115+ data = data .astype (dict (zip (HEADERS , DTYPES )))
98116
99117 # set index
100118 # UTC_TIME does not have leading 0s, so must zfill(4) to comply
@@ -103,19 +121,8 @@ def read_crn(filename):
103121 dtindex = pd .to_datetime (dts ['UTC_DATE' ] + dts ['UTC_TIME' ].str .zfill (4 ),
104122 format = '%Y%m%d%H%M' , utc = True )
105123 data = data .set_index (dtindex )
106- try :
107- # to_datetime(utc=True) does not work in older versions of pandas
108- data = data .tz_localize ('UTC' )
109- except TypeError :
110- pass
111-
112- # Now we can set nans. This could be done a per column basis to be
113- # safer, since in principle a real -99 value could occur in a -9999
114- # column. Very unlikely to see that in the real world.
115- for val in [- 99 , - 999 , - 9999 ]:
116- # consider replacing with .replace([-99, -999, -9999])
117- data = data .where (data != val , np .nan )
118-
119- data = data .rename (columns = VARIABLE_MAP )
124+
125+ if map_variables :
126+ data = data .rename (columns = VARIABLE_MAP )
120127
121128 return data
0 commit comments