Skip to content

Commit 60b4f8d

Browse files
Pass chunks={} to Xarray dataset loader for Zarr stores (#2794)
Co-authored-by: Manuel Schlund <32543114+schlunma@users.noreply.github.com>
1 parent cfb9b1e commit 60b4f8d

File tree

1 file changed

+29
-14
lines changed

1 file changed

+29
-14
lines changed

esmvalcore/preprocessor/_io.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,26 @@ def _load_zarr(
163163
ignore_warnings: list[dict[str, Any]] | None = None,
164164
backend_kwargs: dict[str, Any] | None = None,
165165
) -> CubeList:
166+
# note on ``chunks`` kwarg to ``xr.open_dataset()``
167+
# docs.xarray.dev/en/stable/generated/xarray.open_dataset.html
168+
# this is very important because with ``chunks=None`` (default)
169+
# data will be realized as Numpy arrays and transferred in memory;
170+
# ``chunks={}`` loads the data with dask using the engine preferred
171+
# chunk size, generally identical to the formats chunk size. If not
172+
# available, a single chunk for all arrays; testing shows this is the
173+
# "best guess" compromise for typically CMIP-like chunked data.
174+
# see https://github.com/pydata/xarray/issues/10612 and
175+
# https://github.com/pp-mo/ncdata/issues/139
176+
177+
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
178+
open_kwargs = {
179+
"consolidated": False,
180+
"decode_times": time_coder,
181+
"engine": "zarr",
182+
"chunks": {},
183+
"backend_kwargs": backend_kwargs,
184+
}
185+
166186
# case 1: Zarr store is on remote object store
167187
# file's URI will always be either http or https
168188
if urlparse(str(file)).scheme in ["http", "https"]:
@@ -185,22 +205,17 @@ def _load_zarr(
185205
)
186206
raise ValueError(msg)
187207

188-
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
189-
zarr_xr = xr.open_dataset(
190-
file,
191-
consolidated=True,
192-
decode_times=time_coder,
193-
engine="zarr",
194-
backend_kwargs=backend_kwargs,
195-
)
208+
open_kwargs["consolidated"] = True
209+
zarr_xr = xr.open_dataset(file, **open_kwargs)
196210
# case 2: Zarr store is local to the file system
197211
else:
198-
zarr_xr = xr.open_dataset(
199-
file,
200-
consolidated=False,
201-
engine="zarr",
202-
backend_kwargs=backend_kwargs,
203-
)
212+
zarr_xr = xr.open_dataset(file, **open_kwargs)
213+
214+
# avoid possible
215+
# ValueError: Object has inconsistent chunks along dimension time.
216+
# This can be fixed by calling unify_chunks().
217+
# when trying to access the ``chunks`` store
218+
zarr_xr = zarr_xr.unify_chunks()
204219

205220
return dataset_to_iris(zarr_xr, ignore_warnings=ignore_warnings)
206221

0 commit comments

Comments
 (0)