NCAS-CMS · davidhassell · Jun 2, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/Changelog.rst b/Changelog.rst
@@ -1,18 +1,21 @@
-version NEXTVERSION
--------------------
+Version NEXTVERSION
+----------------
 
 **2025-??-??**
 
+* Read Zarr datasets with `cf.read`
+  (https://github.com/NCAS-CMS/cf-python/issues/863)
 * Update CF aggregation keywords
   (https://github.com/NCAS-CMS/cf-python/issues/868)
 * New keyword parameter to `cf.DimensionCoordinate.create_bounds`:
   ``inplace`` (https://github.com/NCAS-CMS/cf-python/issues/855)
 * Set new minimum version of `dask`: ``2025.5.1``
   (https://github.com/NCAS-CMS/cf-python/issues/866)
+* Changed dependency: ``cfdm>=1.12.2.0, <1.12.3.0``
 * Changed dependency: ``dask>=2025.5.1``
 
 ----
-  
+
 version 3.17.0
 --------------
 

diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ The `cf` package uses
 [Dask](https://ncas-cms.github.io/cf-python/performance.html) for all
 of its array manipulation and can:
 
-* read field constructs from netCDF, CDL, PP and UM datasets with a
+* read field constructs from netCDF, CDL, Zarr, PP and UM datasets with a
   choice of netCDF backends,and in local, http, and s3 locations,
 * create new field constructs in memory,
 * write and append field and domain constructs to netCDF datasets on disk,

diff --git a/cf/__init__.py b/cf/__init__.py
@@ -11,7 +11,7 @@
 The `cf` package uses `dask` for all of its array manipulation and
 can:
 
-* read field constructs from netCDF, CDL, PP and UM datasets,
+* read field constructs from netCDF, CDL, Zarr, PP and UM datasets,
 
 * read field constructs and domain constructs from netCDF, CDL, PP and
   UM datasets with a choice of netCDF backends,
@@ -284,6 +284,7 @@
     RaggedIndexedContiguousArray,
     SubsampledArray,
     UMArray,
+    ZarrArray,
 )
 
 from .aggregate import aggregate, climatology_cells

diff --git a/cf/cfimplementation.py b/cf/cfimplementation.py
@@ -38,6 +38,7 @@
     RaggedIndexedArray,
     RaggedIndexedContiguousArray,
     SubsampledArray,
+    ZarrArray,
 )
 from .functions import CF
 
@@ -49,8 +50,8 @@ class CFImplementation(cfdm.CFDMImplementation):
 
     """
 
-    def nc_set_hdf5_chunksizes(self, data, sizes, override=False):
-        """Set the data HDF5 chunksizes.
+    def nc_set_dataset_chunksizes(self, data, sizes, override=False):
+        """Set the data dataset chunksizes.
 
         .. versionadded:: 3.16.2
 
@@ -60,21 +61,21 @@ def nc_set_hdf5_chunksizes(self, data, sizes, override=False):
                 The data.
 
             sizes: sequence of `int`
-                The new HDF5 chunk sizes.
+                The new dataset chunk sizes.
 
             override: `bool`, optional
-                If True then set the HDF5 chunks sizes even if some
+                If True then set the dataset chunks sizes even if some
                 have already been specified. If False, the default,
-                then only set the HDF5 chunks sizes if some none have
-                already been specified.
+                then only set the dataset chunks sizes if some none
+                have already been specified.
 
         :Returns:
 
             `None`
 
         """
-        if override or not data.nc_hdf5_chunksizes():
-            data.nc_set_hdf5_chunksizes(sizes)
+        if override or not data.nc_dataset_chunksizes():
+            data.nc_set_dataset_chunksizes(sizes)
 
     def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
         """Insert a construct into a field or domain.
@@ -151,6 +152,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
     RaggedIndexedContiguousArray=RaggedIndexedContiguousArray,
     SubsampledArray=SubsampledArray,
     TiePointIndex=TiePointIndex,
+    ZarrArray=ZarrArray,
 )
 
 
@@ -205,7 +207,9 @@ def implementation():
      'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray,
      'RaggedIndexedContiguousArray': cf.data.array.raggedindexedcontiguousarray.RaggedIndexedContiguousArray,
      'SubsampledArray': cf.data.array.subsampledarray.SubsampledArray,
-     'TiePointIndex': cf.tiepointindex.TiePointIndex}
+     'TiePointIndex': cf.tiepointindex.TiePointIndex,
+     'ZarrArray': cf.data.array.zarrarray.ZarrArray,
+    }
 
     """
     return _implementation.copy()
diff --git a/cf/data/array/__init__.py b/cf/data/array/__init__.py
@@ -12,3 +12,4 @@
 from .raggedindexedcontiguousarray import RaggedIndexedContiguousArray
 from .subsampledarray import SubsampledArray
 from .umarray import UMArray
+from .zarrarray import ZarrArray
diff --git a/cf/data/array/zarrarray.py b/cf/data/array/zarrarray.py
@@ -0,0 +1,15 @@
+import cfdm
+
+from ...mixin_container import Container
+
+# Uncomment when we can use active storage on Zarr datasets:
+# from .mixin import ActiveStorageMixin
+
+
+class ZarrArray(
+    # Uncomment when we can use active storage on Zarr datasets:
+    # ActiveStorageMixin,
+    Container,
+    cfdm.ZarrArray,
+):
+    """A Zarr array accessed with `zarr`."""
diff --git a/cf/data/data.py b/cf/data/data.py
@@ -5532,7 +5532,7 @@ def outerproduct(self, a, inplace=False, i=False):
         d = _inplace_enabled_define_and_cleanup(self)
 
         shape = d.shape
-        chunksizes0 = d.nc_hdf5_chunksizes()
+        chunksizes0 = d.nc_dataset_chunksizes()
 
         # Cast 'a' as a Data object so that it definitely has sensible
         # Units. We don't mind if the units of 'a' are incompatible
@@ -5563,19 +5563,19 @@ def outerproduct(self, a, inplace=False, i=False):
         for a_axis in a._cyclic:
             d.cyclic(ndim + a._axes.index(a_axis))
 
-        # Update the HDF5 chunking strategy
-        chunksizes1 = a.nc_hdf5_chunksizes()
+        # Update the dataset chunking strategy
+        chunksizes1 = a.nc_dataset_chunksizes()
         if chunksizes0 or chunksizes1:
             if isinstance(chunksizes0, tuple):
                 if isinstance(chunksizes1, tuple):
                     chunksizes = chunksizes0 + chunksizes1
                 else:
                     chunksizes = chunksizes0 + a.shape
 
-                d.nc_set_hdf5_chunksizes(chunksizes)
+                d.nc_set_dataset_chunksizes(chunksizes)
             elif isinstance(chunksizes1, tuple):
                 chunksizes = shape + chunksizes1
-                d.nc_set_hdf5_chunksizes(chunksizes)
+                d.nc_set_dataset_chunksizes(chunksizes)
 
         d._update_deterministic(a)
         return d
@@ -6259,7 +6259,7 @@ def reshape(self, *shape, merge_chunks=True, limit=None, inplace=False):
 
         # Clear cyclic axes, as we can't help but lose them in this
         # operation
-        d._cyclic = _empty_set
+        del d._cyclic
 
         return d
 

diff --git a/cf/data/mixin/deprecations.py b/cf/data/mixin/deprecations.py
@@ -437,15 +437,17 @@ def dumps(self):
     def HDF_chunks(self, *chunks):
         """Get or set HDF chunk sizes.
 
+        Deprecated at version 3.14.0 and is no longer available. Use
+        the methods `nc_clear_dataset_chunksizes`,
+        `nc_dataset_chunksizes`, and `nc_set_dataset_chunksizes`
+        instead.
+
         The HDF chunk sizes may be used by external code that allows
         `Data` objects to be written to netCDF files.
 
-        Deprecated at version 3.14.0 and is no longer available. Use
-        the methods `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`,
-        and `nc_set_hdf5_chunksizes` instead.
-
-        .. seealso:: `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`,
-                     `nc_set_hdf5_chunksizes`
+        .. seealso:: `nc_clear_dataset_chunksizes`,
+                     `nc_dataset_chunksizes`,
+                     `nc_set_dataset_chunksizes`
 
         :Parameters:
 
@@ -506,8 +508,8 @@ def HDF_chunks(self, *chunks):
         _DEPRECATION_ERROR_METHOD(
             self,
             "HDF_chunks",
-            message="Use the methods 'nc_clear_hdf5_chunksizes', "
-            "'nc_hdf5_chunksizes', and 'nc_set_hdf5_chunksizes' "
+            message="Use the methods 'nc_clear_dataset_chunksizes', "
+            "'nc_dataset_chunksizes', and 'nc_set_dataset_chunksizes' "
             "instead.",
             version="3.14.0",
             removed_at="5.0.0",

diff --git a/cf/data/utils.py b/cf/data/utils.py
@@ -435,8 +435,8 @@ def collapse(
         d._axes = [a for i, a in enumerate(d._axes) if i not in axis]
 
     if d.size != original_size:
-        # Remove the out-dated HDF5 chunking strategy
-        d.nc_clear_hdf5_chunksizes()
+        # Remove the out-dated dataset chunking strategy
+        d.nc_clear_dataset_chunksizes()
 
     return d, weights
 

diff --git a/cf/field.py b/cf/field.py
@@ -13991,19 +13991,20 @@ def field(
         )  # pragma: no cover
 
     def HDF_chunks(self, *chunksizes):
-        """Deprecated at version 3.0.0.
+        """Get or set HDF chunk sizes.
 
-        Use methods 'Data.nc_hdf5_chunksizes',
-        'Data.nc_set_hdf5_chunksizes', 'Data.nc_clear_hdf5_chunksizes'
-        instead.
+        Deprecated at version 3.0.0 and is no longer available.  Use
+        methods `Data.nc_dataset_chunksizes`,
+        `Data.nc_set_dataset_chunksizes`,
+        `Data.nc_clear_dataset_chunksizes` instead.
 
         """
         _DEPRECATION_ERROR_METHOD(
             self,
             "HDF_chunks",
-            "Use methods 'Data.nc_hdf5_chunksizes', "
-            "'Data.nc_set_hdf5_chunksizes', "
-            "'Data.nc_clear_hdf5_chunksizes' instead.",
+            "Use methods 'Data.nc_dataset_chunksizes', "
+            "'Data.nc_set_dataset_chunksizes', "
+            "'Data.nc_clear_dataset_chunksizes' instead.",
             version="3.0.0",
             removed_at="4.0.0",
         )  # pragma: no cover