Skip to content

Commit ca166cf

Browse files
authored
Merge pull request #150 from c-hydro/dev
Dev
2 parents 400017f + fd3116e commit ca166cf

File tree

4 files changed

+53
-31
lines changed

4 files changed

+53
-31
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "door"
7-
version = "2.3.6"
7+
version = "2.3.9"
88
description = "A package for operational retrieval of raster data from different sources"
99
authors = [
1010
{ name = "Andrea Libertino", email = "andrea.libertino@cimafoundation.org" },

src/door/base_downloaders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def download(self, destination: str, min_size: float = None, missing_action: str
380380

381381
url = self.format_url(**kwargs)
382382
try:
383-
r = requests.get(url, kwargs["auth"])
383+
r = requests.get(url, auth = kwargs["auth"])
384384
if r.status_code != 200:
385385
raise FileNotFoundError(r.text)
386386

src/door/data_sources/hsaf/hsaf_downloader.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from ...utils.io import decompress_bz2
1212

1313
import datetime as dt
14-
import requests
1514

1615
from d3tools.spatial import BoundingBox, crop_to_bb
1716
from d3tools import timestepping as ts
@@ -33,15 +32,16 @@ class HSAFDownloader(FTPDownloader):
3332
}
3433

3534
available_products: dict = {
36-
"HSAF-h141": {
35+
"hsaf-h141": {
3736
"ts_per_year": 365,
3837
"url" : "/products/h141/h141/netCDF4/{timestep.start:%Y}/h141_{timestep.start:%Y%m%d}00_R01.nc",
3938
"nodata" : -9999,
4039
"format" : 'nc'
4140
},
42-
"HSAF-h14": {
41+
"hsaf-h14": {
4342
"ts_per_year": 365,
44-
"url" : "/hsaf_archive/h14/{timestep.start:%Y/%m/%d}/h14_{timestep.start:%Y%m%d}_0000.grib.bz2",
43+
"url_alt" : "/hsaf_archive/h14/{timestep.start:%Y/%m/%d}/h14_{timestep.start:%Y%m%d}_0000.grib.bz2",
44+
"url" : "/products/h14/h14_cur_mon_grib/h14_{timestep.start:%Y%m%d}_0000.grib.bz2",
4545
"nodata" : -9999,
4646
"format" : 'bz2'
4747
},
@@ -89,15 +89,6 @@ def __init__(self, product: str) -> None:
8989
self.credentials = get_credentials(env_variables=self.credential_env_vars, url = 'ftp://' + url_host, encode = False)
9090
username, password = self.credentials.split(':')
9191
super().__init__(url_host, protocol = 'ftp', user=username, password=password)
92-
93-
def set_product(self, product: str) -> None:
94-
self.product = product
95-
if product not in self.available_products:
96-
raise ValueError(f'Product {product} not available. Choose one of {self.available_products.keys()}')
97-
self.ts_per_year = self.available_products[product]["ts_per_year"]
98-
self.url_blank = self.available_products[product]["url"]
99-
self.nodata = self.available_products[product]["nodata"]
100-
self.format = self.available_products[product]["format"]
10192

10293
def set_variables(self, variables: list) -> None:
10394
if self.custom_variables:
@@ -178,8 +169,19 @@ def _get_data_ts(self,
178169

179170
# Download the data
180171
retries = self.retries
172+
173+
urls = [self.url.format(timestep=timestep)]
174+
if hasattr(self, 'url_alt'): urls.append(self.url_alt.format(timestep=timestep))
175+
# find the correct url
176+
for url in urls:
177+
if self.check_data(url, timestep=timestep):
178+
break
179+
else:
180+
print(f"No data available for {timestep}")
181+
return
182+
181183
while True:
182-
success = self.download(self.url_blank, tmp_file, timestep = timestep, auth = self.credentials, missing_action = 'ignore', min_size = 50000)
184+
success = self.download(url, tmp_file, timestep = timestep, auth = self.credentials, missing_action = 'ignore', min_size = 50000)
183185
if success:
184186
break
185187
elif retries <= 0:

src/door/data_sources/lsa_saf/lsasaf_downloader.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ class LSASAFDownloader(URLDownloader):
1818

1919
single_temp_folder = False
2020

21+
retries = 3
22+
retry_delay = 10 # seconds
23+
2124
default_options = {
2225
"ts_per_year": 365,
2326
"variables" : None, # all variables
@@ -115,24 +118,41 @@ def _get_data_ts(self,
115118
tmp_path: str) -> Generator[tuple[xr.DataArray, dict], None, None]:
116119

117120

121+
credentials = self.get_credentials()
118122
tmp_file_nc = f'temp_{self.product}{timestep.start:%Y%m%d}.nc'
119123

120124
# check if the file is not already downloaded in the tmp_path
121125
tmp_destination = os.path.join(tmp_path, tmp_file_nc)
122126
this_filename = self.filename.format(time = timestep.start)
123-
self.download(tmp_destination, min_size = 2000, missing_action = 'warning',
124-
time = timestep.start, product_name = self.product_name, satellite = self.satellite, filename = this_filename)
125-
126-
# open the file
127-
raw_data = xr.open_dataset(tmp_destination, engine = 'netcdf4')
128-
for var, varopts in self.variables.items():
129-
vardata = raw_data[var].isel(time = 0, drop = True) # remove the time dimension if present
130-
131-
# crop to the bounding box
132-
vardata = crop_to_bb(vardata, space_bounds)
133-
134-
# set the metadata
135-
vardata = vardata.rio.write_crs('EPSG:4326')
136-
vardata = vardata.rio.set_spatial_dims(x_dim = 'lon', y_dim = 'lat')
137127

138-
yield vardata, {'variable' : var}
128+
success = False
129+
while not success and self.retries > 0:
130+
131+
success = self.download(tmp_destination, min_size = 2000, missing_action = 'warning', auth = tuple(credentials.split(':')),
132+
time = timestep.start, product_name = self.product_name, satellite = self.satellite, filename = this_filename)
133+
134+
if not success:
135+
self.retries -= 1
136+
if self.retries > 0:
137+
print(f'Retrying download in {self.retry_delay} seconds... ({self.retries} retries left)')
138+
import time
139+
time.sleep(self.retry_delay)
140+
continue
141+
else:
142+
print('Max retries reached. Giving up.')
143+
break
144+
145+
# open the file
146+
raw_data = xr.open_dataset(tmp_destination, engine = 'netcdf4')
147+
raw_data.close()
148+
for var, varopts in self.variables.items():
149+
vardata = raw_data[var].isel(time = 0, drop = True) # remove the time dimension if present
150+
151+
# crop to the bounding box
152+
vardata = crop_to_bb(vardata, space_bounds)
153+
154+
# set the metadata
155+
vardata = vardata.rio.write_crs('EPSG:4326')
156+
vardata = vardata.rio.set_spatial_dims(x_dim = 'lon', y_dim = 'lat')
157+
158+
yield vardata, {'variable' : var}

0 commit comments

Comments
 (0)