11import datetime as dt
2- import os
32from typing import Generator
43import xarray as xr
54import numpy as np
98from d3tools .spatial import BoundingBox
109from d3tools import timestepping as ts
1110from d3tools .timestepping .timestep import TimeStep
12- from d3tools .timestepping .fixed_num_timestep import FixedNTimeStep
1311
1412class ERA5Downloader (CDSDownloader ):
1513
1614 source = "ERA5"
1715 name = "ERA5_downloader"
16+ cds_url = 'https://cds.climate.copernicus.eu/api'
1817
1918 available_products = ['reanalysis-era5-single-levels' , 'reanalysis-era5-land' ]
2019
@@ -105,42 +104,17 @@ def build_request(self,
105104 """
106105 Make a request for the CDS API.
107106 """
108- variables = [var for var in self .variables .keys ()]
109-
110- # get the correct timesteps
111- start = time .start
112- end = time .end
113-
114107 # If in the variable list we have total precipitation, we need to download the data for the next day as well
115108 if 'total_precipitation' in self .variables :
116- end += dt . timedelta ( days = 1 )
109+ time = time . extend ( ts . TimeWindow ( 1 , 'd' ) )
117110
118- years = set ()
119- months = set ()
120- days = set ()
121-
122- this_time = start
123- while this_time <= end :
124- years .add (this_time .year )
125- months .add (this_time .month )
126- days .add (this_time .day )
127- this_time += dt .timedelta (days = 1 )
111+ request = super ().build_request (
112+ time , space_bounds
113+ )
128114
129- years_str = [str (y ) for y in years ]
130- months_str = [str (m ).zfill (2 ) for m in months ]
131- days_str = [str (d ).zfill (2 ) for d in days ]
132-
133- # Get the bounding box in the correct order
134- W , S , E , N = space_bounds .bbox
135-
136- request = {
115+ # add ERA5 specific parameters
116+ request .update ({
137117 'product_type' : 'reanalysis' ,
138- 'data_format' : 'grib' , # we always want grib, it's smaller, then we convert
139- 'download_format' : 'unarchived' , #TODO: change this to "zip" and handle unzipping before opening the data!
140- 'variable' : variables ,
141- 'year' : years_str ,
142- 'month' : months_str ,
143- 'day' : days_str ,
144118 'time' : [ # we always want all times in a day
145119 '00:00' , '01:00' , '02:00' ,
146120 '03:00' , '04:00' , '05:00' ,
@@ -151,26 +125,9 @@ def build_request(self,
151125 '18:00' , '19:00' , '20:00' ,
152126 '21:00' , '22:00' , '23:00' ,
153127 ],
154- 'area' : [N , W , S , E ],
155- }
128+ })
156129
157130 return request
158-
159- def get_last_published_ts (self , ts_per_year = None , ** kwargs ) -> ts .TimeRange :
160-
161- """
162- Get the last published date for the dataset.
163- """
164- if ts_per_year is None :
165- ts_per_year = self .ts_per_year
166-
167- # get the last published timestep
168- last_published = self .get_last_published_date ()
169- if ts_per_year == 365 :
170- TimeStep = ts .Day
171- else :
172- TimeStep = FixedNTimeStep .get_subclass (ts_per_year )
173- return TimeStep .from_date (last_published + dt .timedelta (days = 1 )) - 1
174131
175132 def get_last_published_date (self , ** kwargs ) -> dt .datetime :
176133 now = dt .datetime .now ()
@@ -182,26 +139,12 @@ def _get_data_ts(self,
182139 space_bounds : BoundingBox ,
183140 tmp_path : str ) -> Generator [tuple [xr .DataArray , dict ], None , None ]:
184141
185- import cfgrib
186-
187- timestep_start = timestep .start
188- timestep_end = timestep .end
189-
190- tmp_filename = f'temp_{ self .dataset } _{ timestep_start :%Y%m%d} -{ timestep_end :%Y%m%d} .grib2'
191- tmp_destination = os .path .join (tmp_path , tmp_filename )
192-
193-
194- request = self .build_request (timestep , space_bounds )
195- success = self .download (request , tmp_destination , min_size = 100 , missing_action = 'e' )
196-
197- # this will create a list of xarray datasets, one for each "well-formed" cube in the grib file,
198- # this is needed because requesting multiple variables at once will return a single grib file that might contain multiple cubes
199- # (if the variable have different dimensions)
200- all_data = cfgrib .open_datasets (tmp_destination )
142+ all_data = super ()._get_data_ts (timestep , space_bounds , tmp_path )
201143
202144 # loop over the variables
203145 for var , varopts in self .variables .items ():
204146 varname = varopts ['varname' ]
147+ varopts ['var' ] = var
205148
206149 # find the data for the variable
207150 for this_data in all_data :
@@ -236,7 +179,7 @@ def _get_data_ts(self,
236179 vardata = vardata .assign_coords (time = valid_times )
237180
238181 # filter data to the selected days (we have to do this because the API returns data for longer periods than we actually need)
239- inrange = (vardata .time .dt .date >= timestep_start . date ()) & (vardata .time .dt .date <= timestep_end .date ())
182+ inrange = (vardata .time .dt .date >= timestep . start . date ()) & (vardata .time .dt .date <= timestep . end .date ())
240183 vardata = vardata .sel (time = inrange )
241184
242185 # Convert Kelvin to Celsius if we are dealing with temperatures
@@ -247,8 +190,8 @@ def _get_data_ts(self,
247190 vardata = vardata .squeeze ()
248191
249192 # verify that we have all the data we need (i.e. no timesteps of complete nans)!
250- time_to_check = timestep_start
251- while time_to_check <= timestep_end :
193+ time_to_check = timestep . start
194+ while time_to_check <= timestep . end :
252195 istoday = vardata .time .dt .date == time_to_check .date ()
253196 this_data = vardata .sel (time = istoday )
254197 for time in this_data .time :
@@ -263,35 +206,5 @@ def _get_data_ts(self,
263206 if attr .startswith ('GRIB' ):
264207 del vardata .attrs [attr ]
265208
266- ts_as_tr = ts .TimeRange (start = timestep_start , end = timestep_end )
267- agg_timesteps = ts_as_tr .get_timesteps_from_tsnumber (self .ts_per_year_agg )
268-
269- for agg_timestep in agg_timesteps :
270- timestep_start = agg_timestep .start
271- timestep_end = agg_timestep .end
272-
273- # filter data to the aggregation timestep
274- inrange = (vardata .time .dt .date >= timestep_start .date ()) & (vardata .time .dt .date <= timestep_end .date ())
275- vardata_ts = vardata .sel (time = inrange )
276-
277- # add start and end time as attributes
278- vardata_ts .attrs ['start_time' ] = timestep_start
279- vardata_ts .attrs ['end_time' ] = timestep_end
280-
281- # do the necessary aggregations:
282- for agg in varopts ['agg_method' ]:
283-
284- vardata_ts .attrs ['agg_function' ] = agg
285- if agg == 'mean' :
286- aggdata = vardata_ts .mean (dim = 'time' , skipna = False )
287- elif agg == 'max' :
288- aggdata = vardata_ts .max (dim = 'time' , skipna = False )
289- elif agg == 'min' :
290- aggdata = vardata_ts .min (dim = 'time' , skipna = False )
291- elif agg == 'sum' :
292- aggdata = vardata_ts .sum (dim = 'time' , skipna = False )
293-
294- aggdata = aggdata .rio .set_spatial_dims ('longitude' , 'latitude' )
295- aggdata = aggdata .rio .write_crs (self .spatial_ref )
296-
297- yield aggdata , {'variable' : var , 'agg_method' : agg , 'timestep' : agg_timestep }
209+ # aggregate in the superclass and yield
210+ yield from self ._aggregate_variable (vardata , timestep , varopts )
0 commit comments