Skip to content

Commit e3b287d

Browse files
authored
Merge pull request #111 from xcube-dev/konstntokas-xxx-add_drought_index_ensemble
Drought index ensemble added
2 parents 9bc2631 + 29e6c47 commit e3b287d

File tree

8 files changed

+3075
-554
lines changed

8 files changed

+3075
-554
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
- Adjust time range in ERA5 demo notebook. (#106)
44
- Add data access for [monthly drought indices from 1940 to present derived from ERA5 reanalysis](https://cds.climate.copernicus.eu/datasets/derived-drought-historical-monthly?tab=overview). (#109)
5+
- Add data access for [monthly drought indices from 1940 to present derived from ERA5 ensemble members](https://cds.climate.copernicus.eu/datasets/derived-drought-historical-monthly?tab=overview). (#110)
56
- Support chunking of returned datasets. (#110)
67

78
## Changes in 1.1.0

examples/notebooks/Ex4-CDS-store-drought-indices.ipynb

Lines changed: 2946 additions & 524 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"_dataset_name": "derived-drought-historical-monthly", "variable": ["standardised_precipitation_index", "test_for_normality_spi"], "accumulation_period": ["1", "3"], "area": [0.875, -0.875, -0.875, 0.875], "version": "1_0", "product_type": ["ensemble_members"], "dataset_type": "consolidated_dataset", "year": ["2015", "2016"], "month": ["01", "02", "10", "11", "12"]}
1.04 MB
Binary file not shown.

test/mock_results/test_drought_indices_open_data/request.json renamed to test/mock_results/test_drought_indices_open_data_reanalysis/request.json

File renamed without changes.

test/mock_results/test_drought_indices_open_data/result renamed to test/mock_results/test_drought_indices_open_data_reanalysis/result

File renamed without changes.

test/test_drought_indices.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ class CDSDroughtIndicesDatasetHandlerTest(unittest.TestCase):
4141
def setUp(self) -> None:
4242
self.drought_idx_handler = DroughtIndicesDatasetHandler()
4343
self.data_id_reanalysis = "derived-drought-historical-monthly:reanalysis"
44+
self.data_id_ensemble = "derived-drought-historical-monthly:ensemble_members"
4445

4546
def test_get_supported_data_ids(self):
4647
ids = self.drought_idx_handler.get_supported_data_ids()
47-
self.assertCountEqual([self.data_id_reanalysis], ids)
48+
self.assertCountEqual([self.data_id_reanalysis, self.data_id_ensemble], ids)
4849

4950
def test_get_human_readable_data_id(self):
5051
self.assertEqual(
@@ -67,7 +68,7 @@ def test_get_open_data_params_schema(self):
6768
schema.required,
6869
)
6970

70-
def test_describe_data(self):
71+
def test_describe_data_reanalysis(self):
7172
descriptor = self.drought_idx_handler.describe_data(self.data_id_reanalysis)
7273
self.assertEqual(self.data_id_reanalysis, descriptor.data_id)
7374
self.assertEqual("EPSG:4326", descriptor.crs)
@@ -76,8 +77,22 @@ def test_describe_data(self):
7677
self.assertEqual("1940-01-01", descriptor.time_range[0])
7778
self.assertEqual("2025-12-31", descriptor.time_range[1])
7879
self.assertEqual("1M", descriptor.time_period)
80+
self.assertEqual(("time", "lat", "lon"), descriptor.data_vars["spi1"].dims)
7981

80-
def test_open_data(self):
82+
def test_describe_data_ensemble(self):
83+
descriptor = self.drought_idx_handler.describe_data(self.data_id_ensemble)
84+
self.assertEqual(self.data_id_ensemble, descriptor.data_id)
85+
self.assertEqual("EPSG:4326", descriptor.crs)
86+
self.assertEqual((-180.0, -90.0, 180.0, 90.0), descriptor.bbox)
87+
self.assertEqual(0.25, descriptor.spatial_res)
88+
self.assertEqual("1940-01-01", descriptor.time_range[0])
89+
self.assertEqual("2025-12-31", descriptor.time_range[1])
90+
self.assertEqual("1M", descriptor.time_period)
91+
self.assertEqual(
92+
("time", "number", "lat", "lon"), descriptor.data_vars["spi1"].dims
93+
)
94+
95+
def test_open_data_reanalysis(self):
8196
opener = CDSDataOpener(
8297
client_class=get_cds_client(),
8398
endpoint_url=_CDS_API_URL,
@@ -105,6 +120,39 @@ def test_open_data(self):
105120
dataset.data_vars,
106121
)
107122

123+
def test_open_data_ensemble(self):
124+
opener = CDSDataOpener(
125+
client_class=get_cds_client(),
126+
endpoint_url=_CDS_API_URL,
127+
cds_api_key=_CDS_API_KEY,
128+
)
129+
dataset = opener.open_data(
130+
self.data_id_ensemble,
131+
variable_names=[
132+
"standardised_precipitation_index",
133+
"test_for_normality_spi",
134+
],
135+
accumulation_periods=[1, 3],
136+
bbox=[-1, -1, 1, 1],
137+
time_range=["2015-10-15", "2016-02-02"],
138+
)
139+
self.assertIsNotNone(dataset)
140+
# Monthly data is timestamped at the first of the month, so we expect
141+
# four time co-ordinates (November to February inclusive).
142+
self.assertEqual(
143+
[4, 10, 7, 7],
144+
[
145+
dataset.sizes["time"],
146+
dataset.sizes["number"],
147+
dataset.sizes["lat"],
148+
dataset.sizes["lon"],
149+
],
150+
)
151+
self.assertCountEqual(
152+
["spi1", "spi3", "spi1_significance", "spi3_significance"],
153+
dataset.data_vars,
154+
)
155+
108156
def test_get_filepath_pattern_raises(self):
109157
with self.assertRaises(ValueError) as cm:
110158
self.drought_idx_handler._get_filepath_pattern("not_a_valid_key", 12)

xcube_cds/datasets/drought_indices_era5.py

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import os
2525
import pathlib
2626

27+
import numpy as np
2728
import pandas as pd
2829
import xarray as xr
2930
from xcube.core.store import DatasetDescriptor, VariableDescriptor
@@ -45,6 +46,10 @@ def __init__(self):
4546
"Monthly drought indices from 1940–present derived "
4647
"from ERA5 reanalysis (main run)"
4748
),
49+
"derived-drought-historical-monthly:ensemble_members": (
50+
"Monthly drought indices from 1940–present derived "
51+
"from ERA5 ensemble (10 members)"
52+
),
4853
}
4954
self._variable_names = [
5055
"standardised_precipitation_index",
@@ -114,13 +119,18 @@ def describe_data(self, data_id: str) -> DatasetDescriptor:
114119
var_name, accum_period
115120
)
116121

122+
if data_id.endswith("reanalysis"):
123+
dims = ("time", "lat", "lon")
124+
else:
125+
dims = ("time", "number", "lat", "lon")
126+
117127
variable_descriptors = []
118128
for var_name, attrs in mapping_varname_attrs.items():
119129
variable_descriptors.append(
120130
VariableDescriptor(
121131
name=var_name,
122132
dtype="float64",
123-
dims=("time", "lat", "lon"),
133+
dims=dims,
124134
attrs=attrs,
125135
)
126136
)
@@ -177,33 +187,72 @@ def read_file(
177187
zip_ref.extractall(path_temp)
178188
file_paths = glob.glob(f"{path_temp}/*")
179189
dss = []
180-
for var_name in open_params["variable_names"]:
181-
for accum_period in open_params["accumulation_periods"]:
182-
pattern = self._get_filepath_pattern(var_name, accum_period)
183-
file_sel = [path for path in file_paths if pattern in path]
184-
file_sel = sorted(file_sel)
185-
ds = xr.open_mfdataset(
186-
file_sel,
187-
engine="netcdf4",
188-
chunks="auto",
189-
combine_attrs="drop_conflicts",
190-
)
191-
if "standardised_precipitation" in var_name:
192-
ds = ds.sel(
193-
time=slice(
194-
open_params["time_range"][0], open_params["time_range"][1]
195-
)
190+
191+
if cds_api_params["product_type"] == ["reanalysis"]:
192+
for var_name in open_params["variable_names"]:
193+
for accum_period in open_params["accumulation_periods"]:
194+
pattern = self._get_filepath_pattern(var_name, accum_period)
195+
file_sel = [path for path in file_paths if pattern in path]
196+
file_sel = sorted(file_sel)
197+
ds = xr.open_mfdataset(
198+
file_sel,
199+
engine="netcdf4",
200+
chunks="auto",
201+
combine_attrs="drop_conflicts",
196202
)
197-
else:
198-
ds = self._resample_quality_ds(ds, open_params["time_range"])
199-
assert len(ds.data_vars) == 1
200-
ds_varname = self._get_varname(var_name, accum_period)
201-
ds = ds.rename({list(ds.data_vars.keys())[0]: ds_varname})
202-
dss.append(ds)
203-
ds_final = xr.merge(dss, join="outer", combine_attrs="drop_conflicts")
204-
ds_final = ds_final.sel(
205-
time=slice(open_params["time_range"][0], open_params["time_range"][1])
206-
)
203+
if "standardised_precipitation" in var_name:
204+
ds = ds.sel(
205+
time=slice(
206+
open_params["time_range"][0],
207+
open_params["time_range"][1],
208+
)
209+
)
210+
else:
211+
ds = self._resample_quality_ds(ds, open_params["time_range"])
212+
assert len(ds.data_vars) == 1
213+
ds_varname = self._get_varname(var_name, accum_period)
214+
ds = ds.rename({list(ds.data_vars.keys())[0]: ds_varname})
215+
dss.append(ds)
216+
ds_final = xr.merge(dss, join="outer", combine_attrs="drop_conflicts")
217+
ds_final = ds_final.sel(
218+
time=slice(open_params["time_range"][0], open_params["time_range"][1])
219+
)
220+
else:
221+
for var_name in open_params["variable_names"]:
222+
for accum_period in open_params["accumulation_periods"]:
223+
pattern = self._get_filepath_pattern(var_name, accum_period)
224+
file_sel = [path for path in file_paths if pattern in path]
225+
file_sel = sorted(file_sel)
226+
dss_inner = []
227+
for path in file_sel:
228+
ds = xr.open_dataset(path, engine="netcdf4", chunks="auto")
229+
time_axis = ds.time
230+
# The data from the backend uses the confusing name `time` for the
231+
# ensemble member index. We rename it to `number` to be consistent
232+
# with other ERA5 datasets, and to free up the name `time` for the actual
233+
# time.
234+
ds = ds.rename({"time": "number"})
235+
ds = ds.assign_coords(number=np.arange(10))
236+
ds = ds.expand_dims(time=[time_axis[0].values])
237+
dss_inner.append(ds)
238+
ds = xr.concat(dss_inner, "time", combine_attrs="drop_conflicts")
239+
if "standardised_precipitation" in var_name:
240+
ds = ds.sel(
241+
time=slice(
242+
open_params["time_range"][0],
243+
open_params["time_range"][1],
244+
)
245+
)
246+
else:
247+
ds = self._resample_quality_ds(ds, open_params["time_range"])
248+
assert len(ds.data_vars) == 1
249+
ds_varname = self._get_varname(var_name, accum_period)
250+
ds = ds.rename({list(ds.data_vars.keys())[0]: ds_varname})
251+
dss.append(ds)
252+
ds_final = xr.merge(dss, join="outer", combine_attrs="drop_conflicts")
253+
ds_final = ds_final.sel(
254+
time=slice(open_params["time_range"][0], open_params["time_range"][1])
255+
)
207256
return ds_final
208257

209258
@staticmethod

0 commit comments

Comments
 (0)