Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
e347f40
add basic zarr support
valeriupredoi Jul 24, 2025
5c32b55
add basic test
valeriupredoi Jul 24, 2025
682f46d
add sample zarr store
valeriupredoi Jul 24, 2025
81c254c
add sample zarr store
valeriupredoi Jul 24, 2025
6a02757
turn on gha
valeriupredoi Jul 24, 2025
84412ab
add zarr as dependency
valeriupredoi Jul 24, 2025
1f8e127
add zarr as dependency
valeriupredoi Jul 24, 2025
5b97169
account for remote zarrs
valeriupredoi Jul 24, 2025
8bcc15f
add test case for remote zarr
valeriupredoi Jul 24, 2025
c0b049c
functional remote Zarr and cleanup
valeriupredoi Jul 24, 2025
e5f8c4e
add utility and test for remote zarr
valeriupredoi Jul 24, 2025
9265b0d
add intake-esm as dependency
valeriupredoi Jul 24, 2025
4be6152
add aiohttp as dependency
valeriupredoi Jul 24, 2025
28f647f
fixture
valeriupredoi Jul 24, 2025
6da4183
remove unwanted (for now) fixture altogether
valeriupredoi Jul 24, 2025
fb7712a
remove unneeded import
valeriupredoi Jul 24, 2025
95a92c9
add storeage options
valeriupredoi Jul 25, 2025
872be18
semi-working version for publick bucket for esmvaltool
valeriupredoi Jul 25, 2025
971cf34
correct bucket with correct permissions and working test
valeriupredoi Jul 28, 2025
0eeeb50
add yet another test
valeriupredoi Jul 28, 2025
f5d13c8
adjust test member docstring
valeriupredoi Jul 28, 2025
fa8b90a
make io more robust
valeriupredoi Jul 28, 2025
cccdb39
change api
valeriupredoi Jul 28, 2025
1618076
test changed api
valeriupredoi Jul 28, 2025
e2ed41c
add basic test for zarr file
valeriupredoi Jul 28, 2025
fe7326e
add test for file with issues
valeriupredoi Jul 28, 2025
39df34e
reduce pytest runners to 2
valeriupredoi Jul 28, 2025
2b44ac9
run only test load
valeriupredoi Jul 28, 2025
caff216
skip a test
valeriupredoi Jul 28, 2025
d48418c
change skip message
valeriupredoi Jul 29, 2025
0909770
restore circle ci configuration
valeriupredoi Jul 29, 2025
e87b12b
skip the other test that uses the healpix dataset
valeriupredoi Jul 29, 2025
37d8a31
removed problematic skipped tests
valeriupredoi Jul 29, 2025
0d446af
add dedicated Zarr IO test module
valeriupredoi Jul 29, 2025
37fcfff
add xr to ncdata test
valeriupredoi Jul 29, 2025
94d8677
add pytest marker
valeriupredoi Jul 29, 2025
7ac7b45
run zarr test single proc
valeriupredoi Jul 29, 2025
caa3657
mark test
valeriupredoi Jul 29, 2025
b4c6b6f
remove pytest marker
valeriupredoi Jul 29, 2025
48db5f3
restore circleci configuration
valeriupredoi Jul 29, 2025
0afcec7
unmark test but dont use cf_time flag
valeriupredoi Jul 29, 2025
0c4a16f
set consolidated to False
valeriupredoi Jul 29, 2025
72d79c2
found hang cause
valeriupredoi Jul 29, 2025
8e54f1e
add Ncdata issue pointer
valeriupredoi Jul 29, 2025
1572fff
replace deprecated use cftime
valeriupredoi Jul 29, 2025
b1fe4b8
add zar3 test and fixed deprecated call with cftime
valeriupredoi Jul 29, 2025
f5c5979
add test non existing file
valeriupredoi Jul 30, 2025
8cddb55
add CMIP6 Zarr store and metadata test for it
valeriupredoi Jul 30, 2025
0d71de7
add test resources
valeriupredoi Jul 30, 2025
2ab8fc0
add purely diagnostic test
valeriupredoi Jul 30, 2025
b01b578
feed the PEP typing moster an actual type
valeriupredoi Jul 30, 2025
ea9377a
cleanup tests
valeriupredoi Jul 30, 2025
72d87bc
cleanup implement
valeriupredoi Jul 30, 2025
76b32b4
dict typing
valeriupredoi Jul 30, 2025
90c8963
Merge branch 'main' into zarr_support
valeriupredoi Jul 30, 2025
7af2ec4
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
c151b57
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
ab78052
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
b5c3301
add mention about backend dict
valeriupredoi Jul 31, 2025
d514b67
add inline text
valeriupredoi Jul 31, 2025
2852381
removed all Zarr tests and moved to test_zarr.py
valeriupredoi Jul 31, 2025
49fb643
moved all tests from test_load here and removed tests that dont test …
valeriupredoi Jul 31, 2025
6a554d8
add mention about s3 bucket
valeriupredoi Jul 31, 2025
683b6e8
spruce up zarr tests and add an extra test for local files
valeriupredoi Jul 31, 2025
f2923e6
add dummy zar plaintext file
valeriupredoi Jul 31, 2025
8c49e20
dont match to exception string
valeriupredoi Jul 31, 2025
8b6f221
add info on further testing
valeriupredoi Jul 31, 2025
63411cb
unrun GHA
valeriupredoi Jul 31, 2025
84a33f2
add str path test
valeriupredoi Jul 31, 2025
8909b7d
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
eff8956
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
a2e31ab
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
a387558
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
37266da
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
e13a19e
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
cef79ce
Update esmvalcore/preprocessor/_io.py
valeriupredoi Jul 31, 2025
63b817f
fix pytest msg regex
valeriupredoi Jul 31, 2025
71ebe4e
better handling of exceptions
valeriupredoi Jul 31, 2025
171ea74
Update tests/integration/preprocessor/_io/test_zarr.py
valeriupredoi Jul 31, 2025
464c9f3
Update tests/integration/preprocessor/_io/test_zarr.py
valeriupredoi Jul 31, 2025
66f9811
Update tests/integration/preprocessor/_io/test_zarr.py
valeriupredoi Jul 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ on:
push:
branches:
- main
- zarr_support
# run the test only if the PR is to main
# turn it on if required
#pull_request:
Expand Down
3 changes: 3 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
- nodefaults

dependencies:
- aiohttp
- cartopy
- cf-units
- cftime
Expand All @@ -18,6 +19,7 @@ dependencies:
- fire
- geopy
- humanfriendly
- intake-esm
- iris >=3.12.2 # https://github.com/SciTools/iris/issues/6417
- iris-esmf-regrid >=0.11.0
- iris-grib >=0.20.0 # github.com/ESMValGroup/ESMValCore/issues/2535
Expand Down Expand Up @@ -46,6 +48,7 @@ dependencies:
- shapely >=2.0.0
- xarray
- yamale
- zarr >3
# Python packages needed for building docs
- autodocsumm >=0.2.2
- ipython <9.0 # github.com/ESMValGroup/ESMValCore/issues/2680
Expand Down
22 changes: 21 additions & 1 deletion esmvalcore/preprocessor/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from itertools import groupby
from pathlib import Path
from typing import TYPE_CHECKING, Any
from urllib.parse import urlparse

import iris
import ncdata
Expand Down Expand Up @@ -83,6 +84,7 @@
file:
File to be loaded. If ``file`` is already a loaded dataset, return it
as a :class:`~iris.cube.CubeList`.
File as ``Path`` object could be a Zarr store.
ignore_warnings:
Keyword arguments passed to :func:`warnings.filterwarnings` used to
ignore warnings issued by :func:`iris.load_raw`. Each list element
Expand All @@ -102,7 +104,11 @@
"""
if isinstance(file, (str, Path)):
cubes = _load_from_file(file, ignore_warnings=ignore_warnings)
if "zarr" not in str(file):
cubes = _load_from_file(file, ignore_warnings=ignore_warnings)
else:
zarr_xr = _load_zarr(file)

Check warning on line 110 in esmvalcore/preprocessor/_io.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

esmvalcore/preprocessor/_io.py#L110

Call to untyped function "_load_zarr" in typed context. (no-untyped-call)
cubes = dataset_to_iris(zarr_xr, ignore_warnings=ignore_warnings)
elif isinstance(file, Cube):
cubes = CubeList([file])
elif isinstance(file, CubeList):
Expand Down Expand Up @@ -134,6 +140,20 @@
return cubes


def _load_zarr(file):

Check warning on line 143 in esmvalcore/preprocessor/_io.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

esmvalcore/preprocessor/_io.py#L143

Function is missing a type annotation. (no-untyped-def)
if isinstance(file, Path):
zarr_xr = xr.open_zarr(file, consolidated=False)
elif urlparse(file):
zarr_xr = xr.open_dataset(
file,
consolidated=True,
use_cftime=True,
engine="zarr",
)

return zarr_xr

Check notice on line 154 in esmvalcore/preprocessor/_io.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

esmvalcore/preprocessor/_io.py#L154

Possibly using variable 'zarr_xr' before assignment (possibly-used-before-assignment)


def _load_from_file(
file: str | Path,
ignore_warnings: list[dict[str, Any]] | None = None,
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dynamic = [
"version",
]
dependencies = [
"aiohttp",
"cartopy",
"cf-units",
"dask[array,distributed]>=2025", # Core/issues/2503
Expand All @@ -44,6 +45,7 @@ dependencies = [
"fire",
"geopy",
"humanfriendly",
"intake-esm",
"iris-grib>=0.20.0", # github.com/ESMValGroup/ESMValCore/issues/2535
"isodate>=0.7.0",
"jinja2",
Expand All @@ -68,6 +70,7 @@ dependencies = [
"stratify>=0.3",
"xarray",
"yamale",
"zarr>3",
]
description = "A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts"
license = {text = "Apache License, Version 2.0"}
Expand Down
38 changes: 38 additions & 0 deletions tests/integration/preprocessor/_io/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from importlib.resources import files as importlib_files
from pathlib import Path

import cf_units
import iris
import ncdata
import numpy as np
Expand Down Expand Up @@ -120,6 +121,43 @@ def test_load_ncdata():
assert not cube.coords()


def test_load_zarr_local():
"""Test loading a Zarr store as ncdata.NcData via Xarray."""
zarr_path = (
Path(importlib_files("tests"))
/ "sample_data"
/ "zarr-sample-data"
/ "example_field_0.zarr2"
)

cubes = load(zarr_path)

assert len(cubes) == 1
cube = cubes[0]
assert cube.var_name == "q"
assert cube.standard_name == "specific_humidity"
assert cube.long_name is None
assert cube.units == cf_units.Unit("1")
coords = cube.coords()
coord_names = [coord.standard_name for coord in coords]
assert "longitude" in coord_names
assert "latitude" in coord_names


def test_load_zarr_remote():
"""Test loading a Zarr store from a https Object Store."""
zarr_path = (
"https://hackathon-o.s3-ext.jc.rl.ac.uk/sim-data/dev/v5/"
"glm.n2560_RAL3p3/um.PT1H.hp_z2.zarr"
)
cubes = load(zarr_path)
for cube in cubes:
if cube.standard_name == "air_temperature":
coords = cube.coords()
coord_names = [coord.standard_name for coord in coords]
assert "time" in coord_names


def test_load_invalid_type_fail():
"""Test loading an invalid type."""
with pytest.raises(TypeError):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"Conventions": "CF-1.12"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
171 changes: 171 additions & 0 deletions tests/sample_data/zarr-sample-data/example_field_0.zarr2/.zmetadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"metadata": {
".zattrs": {
"Conventions": "CF-1.12"
},
".zgroup": {
"zarr_format": 2
},
"lat/.zarray": {
"chunks": [
5
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5
],
"zarr_format": 2
},
"lat/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat"
],
"bounds": "lat_bnds",
"standard_name": "latitude",
"units": "degrees_north"
},
"lat_bnds/.zarray": {
"chunks": [
3,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
2
],
"zarr_format": 2
},
"lat_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"bounds2"
]
},
"lon/.zarray": {
"chunks": [
8
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8
],
"zarr_format": 2
},
"lon/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon"
],
"bounds": "lon_bnds",
"standard_name": "longitude",
"units": "degrees_east"
},
"lon_bnds/.zarray": {
"chunks": [
4,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8,
2
],
"zarr_format": 2
},
"lon_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon",
"bounds2"
]
},
"q/.zarray": {
"chunks": [
3,
4
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
8
],
"zarr_format": 2
},
"q/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"lon"
],
"cell_methods": "area: mean",
"coordinates": "time",
"project": "research",
"standard_name": "specific_humidity",
"units": "1"
},
"time/.zarray": {
"chunks": [],
"compressor": null,
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [],
"zarr_format": 2
},
"time/.zattrs": {
"_ARRAY_DIMENSIONS": [],
"standard_name": "time",
"units": "days since 2018-12-01"
}
},
"zarr_consolidated_format": 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"chunks": [
5
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5
],
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"_ARRAY_DIMENSIONS": [
"lat"
],
"bounds": "lat_bnds",
"standard_name": "latitude",
"units": "degrees_north"
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"chunks": [
3,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
2
],
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"_ARRAY_DIMENSIONS": [
"lat",
"bounds2"
]
}
Binary file not shown.
Binary file not shown.
Loading
Loading