@@ -42,10 +42,11 @@ def read_srml(filename):
42
42
43
43
Notes
44
44
-----
45
- The time index is shifted back one minute to account for 2400 hours,
46
- and to avoid time parsing errors on leap years. The returned data
47
- values should be understood to occur during the interval from the
48
- time of the row until the time of the next row. This is consistent
45
+ The time index is shifted back by one interval to account for the
46
+ daily endtime of 2400, and to avoid time parsing errors on leap
47
+ years. The returned data values are labeled by the left endpoint of
48
+ interval, and should be understood to occur during the interval from
49
+ the time of the row until the time of the next row. This is consistent
49
50
with pandas' default labeling behavior.
50
51
51
52
See SRML's `Archival Files`_ page for more information.
@@ -134,11 +135,27 @@ def format_index(df):
134
135
year = int (df .columns [1 ])
135
136
df_doy = df [df .columns [0 ]]
136
137
# Times are expressed as integers from 1-2400, we convert to 0-2359 by
137
- # subracting one and then correcting the minutes at each former hour.
138
- df_time = df [df .columns [1 ]] - 1
139
- fifty_nines = df_time % 100 == 99
140
- times = df_time .where (~ fifty_nines , df_time - 40 )
141
-
138
+ # subracting the length of one interval and then correcting the times
139
+ # at each former hour. interval_length is determined by taking the
140
+ # difference of the first two rows of the time column.
141
+ # e.g. The first two rows of hourly data are 100 and 200
142
+ # so interval_length is 100.
143
+ interval_length = df [df .columns [1 ]][1 ] - df [df .columns [1 ]][0 ]
144
+ df_time = df [df .columns [1 ]] - interval_length
145
+ if interval_length == 100 :
146
+ # Hourly files do not require fixing the former hour timestamps.
147
+ times = df_time
148
+ else :
149
+ # Because hours are represented by some multiple of 100, shifting
150
+ # results in invalid values.
151
+ #
152
+ # e.g. 200 (for 02:00) shifted by 15 minutes becomes 185, the
153
+ # desired result is 145 (for 01:45)
154
+ #
155
+ # So we find all times with minutes greater than 60 and remove 40
156
+ # to correct to valid times.
157
+ old_hours = df_time % 100 > 60
158
+ times = df_time .where (~ old_hours , df_time - 40 )
142
159
times = times .apply (lambda x : '{:04.0f}' .format (x ))
143
160
doy = df_doy .apply (lambda x : '{:03.0f}' .format (x ))
144
161
dts = pd .to_datetime (str (year ) + '-' + doy + '-' + times ,
@@ -161,14 +178,30 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO'):
161
178
month: int
162
179
Month to request data for.
163
180
filetype: string
164
- SRML file type to gather. 'RO' and 'PO' are the
165
- only minute resolution files.
181
+ SRML file type to gather. See notes for explanation.
166
182
167
183
Returns
168
184
-------
169
185
data: pd.DataFrame
170
186
One month of data from SRML.
171
187
188
+ Notes
189
+ -----
190
+ File types designate the time interval of a file and if it contains
191
+ raw or processed data. For instance, `RO` designates raw, one minute
192
+ data and `PO` designates processed one minute data. The availability
193
+ of file types varies between sites. Below is a table of file types
194
+ and their time intervals. See [1] for site information.
195
+
196
+ ============= ============ ==================
197
+ time interval raw filetype processed filetype
198
+ ============= ============ ==================
199
+ 1 minute RO PO
200
+ 5 minute RF PF
201
+ 15 minute RQ PQ
202
+ hourly RH PH
203
+ ============= ============ ==================
204
+
172
205
References
173
206
----------
174
207
[1] University of Oregon Solar Radiation Measurement Laboratory
0 commit comments