Skip to content

Commit 7ac2c11

Browse files
authored
Merge pull request #141 from c-hydro/dev
Dev
2 parents 505ecce + 3f8d130 commit 7ac2c11

File tree

2 files changed

+37
-43
lines changed

2 files changed

+37
-43
lines changed

src/door/data_sources/clms/clms_downloader.py

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ class CLMSDownloader(URLDownloader):
2828

2929
available_products = {
3030
'swi': {
31-
'versions': ["3.1.1", "3.2.1"],
32-
'url': clms_url + 'geotiff/soil_water_index/swi_12.5km_v3_{ts_str}daily/{timestep.start:%Y}/{timestep.start:%Y%m%d}/c_gls_SWI{ts_str}-SWI{var}_{timestep.start:%Y%m%d}1200_GLOBE_ASCAT_V{version}.tiff',
31+
'versions': ["3.1.1", "3.2.1", "4.0.1"],
32+
'url': clms_url + 'netcdf/soil_water_index/swi_12.5km_v{version[0]}_{ts_str}daily/{timestep.start:%Y}/{timestep.start:%Y%m%d}/c_gls_SWI{ts_str}_{timestep.start:%Y%m%d}1200_GLOBE_ASCAT_V{version}.nc',
3333
'nodata': 255,
3434
'scale_factor': 0.5
3535
}
@@ -47,6 +47,7 @@ def set_product(self, product: str) -> None:
4747
self.nodata = self.available_products[self.product]["nodata"]
4848
self.scale_factor = self.available_products[self.product]["scale_factor"]
4949
self.versions = self.available_products[self.product]["versions"]
50+
self.versions.reverse()
5051
self.url_blank = self.available_products[self.product]["url"]
5152

5253
def set_variables(self, variables: list) -> None:
@@ -66,7 +67,7 @@ def get_last_published_ts(self, **kwargs) -> ts.TimeRange:
6667
Get the last published date for the dataset.
6768
"""
6869

69-
ts_per_year = self.ts_per_year
70+
ts_per_year = self.ts_per_year if hasattr(self, 'ts_per_year') else 365
7071

7172
# Set ts_str based on the ts_per_year
7273
if ts_per_year == 36:
@@ -104,70 +105,62 @@ def get_last_published_date(self, **kwargs) -> dt.datetime:
104105
return self.get_last_published_ts(**kwargs).end
105106

106107
def _get_data_ts(self,
107-
time_range: TimeStep,
108+
time_step: TimeStep,
108109
space_bounds: BoundingBox,
109110
tmp_path: str,
110111
**kwargs) -> Iterable[tuple[xr.DataArray, dict]]:
111112

112113
"""
113114
Get the data for a specific timestep.
114115
"""
115-
for variable in self.variables:
116-
yield from self._get_data_ts_singlevar(time_range, space_bounds, tmp_path, variable, **kwargs)
117116

118-
def _get_data_ts_singlevar(
119-
self,
120-
time_range: TimeStep,
121-
space_bounds: BoundingBox,
122-
tmp_path: str,
123-
variable: str,
124-
**kwargs) -> Iterable[tuple[xr.DataArray, dict]]:
125-
''' Get the data for a specific timestep and variable. '''
117+
for v in self.versions:
118+
this_url_v = self.url_blank.format(
119+
ts_str=self.ts_str,
120+
timestep=time_step,
121+
version=v
122+
)
123+
124+
# try to download the file
125+
response = requests.head(this_url_v)
126126

127-
# Get the URL without version
128-
url_blank = self.url_blank.format(
129-
ts_str=self.ts_str,
130-
timestep=time_range,
131-
var=variable,
132-
version="{version}"
133-
)
127+
if response.status_code is requests.codes.ok:
128+
url = this_url_v
129+
break
130+
else:
131+
# If the loop ends without breaking, the data is missing
132+
handle_missing('warning', {'timestep': time_step})
133+
return
134134

135135
# Download the file
136-
ts_end = time_range.end
137-
tmp_filename_raw = f'temp_{self.product}{variable}_{ts_end:%Y%m%d}.tif'
136+
ts_end = time_step.end
137+
tmp_filename_raw = f'temp_{self.product}_{ts_end:%Y%m%d}.nc'
138138
tmp_destination = os.path.join(tmp_path, tmp_filename_raw)
139+
download_http(url, tmp_destination)
140+
141+
# open it
142+
data = xr.open_dataset(tmp_destination)
139143

140-
# try to download the file in both versions
141-
success = False
142-
for version in self.versions:
143-
url_v = url_blank.format(version=version)
144-
145-
response = requests.head(url_v)
146-
147-
if response.status_code is requests.codes.ok:
148-
url = url_v
149-
success = True
150-
break
151-
152-
if success:
153-
download_http(url, tmp_destination)
144+
# extract the variables from the file, crop them and yield them
145+
for variable in self.variables:
146+
147+
this_data = data[f'{self.product.upper()}_{variable}']
154148

155149
# Crop the data
156-
cropped = crop_to_bb(tmp_destination, space_bounds)
150+
cropped = crop_to_bb(this_data, space_bounds)
157151

158152
# Change the nodata value to np.nan and return the data
159153
cropped = cropped.where(~np.isclose(cropped, self.nodata, equal_nan=True), np.nan)
160154
cropped.rio.no_data = np.nan
161155

156+
# ensure the crs is set
157+
cropped = cropped.rio.set_crs("EPSG:4326")
158+
162159
# Apply the scale factor
163160
cropped *= self.scale_factor
164161

165162
yield cropped, {'variable': variable}
166163

167-
else:
168-
# If the loop ends without breaking, the data is missing
169-
handle_missing('warning', {'timestep': time_range, 'variable': variable})
170-
171164
def get_data(self,
172165
time_range: ts.TimeRange|Sequence[dt.datetime],
173166
space_bounds: Optional[BoundingBox] = None,

workflow_examples/option_files/clms_example.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
"destination" : "{DATASETS.destination}",
1818
"bounds" : [8, 38.5, 10, 41.5],
1919
"options" : {
20-
"variables": ["020"]
20+
"variables": ["020"],
21+
"ts_per_year" : 36
2122
}
2223
}
2324
}

0 commit comments

Comments
 (0)