Dataless netcdf load+save; plus tests.

pp-mo · pp-mo · commit 354f3e5db41b · 2025-10-07T14:14:06.000+01:00
diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py
@@ -392,8 +392,17 @@ def _load_cube_inner(engine, cf, cf_var, filename):
     from iris.cube import Cube
 
     """Create the cube associated with the CF-netCDF data variable."""
-    data = _get_cf_var_data(cf_var)
-    cube = Cube(data)
+    from iris.fileformats.netcdf.saver import Saver
+
+    if hasattr(cf_var, Saver._DATALESS_ATTRNAME):
+        # This data-variable represents a dataless cube.
+        # The variable array content was never written (to take up no space).
+        data = None
+        shape = cf_var.shape
+    else:
+        data = _get_cf_var_data(cf_var)
+        shape = None
+    cube = Cube(data=data, shape=shape)
 
     # Reset the actions engine.
     engine.reset()
diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -2275,6 +2275,10 @@ def _create_cf_grid_mapping(self, cube, cf_var_cube):
             if grid_mapping:
                 _setncattr(cf_var_cube, "grid_mapping", grid_mapping)
 
+    _DATALESS_ATTRNAME = "iris_dataless_cube"
+    _DATALESS_DTYPE = np.dtype("u1")
+    _DATALESS_FILLVALUE = 127
+
     def _create_cf_data_variable(
         self,
         cube,
@@ -2315,9 +2319,19 @@ def _create_cf_data_variable(
         # TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
         # be removed.
         # Get the values in a form which is valid for the file format.
-        data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
+        is_dataless = cube.is_dataless()
+        if is_dataless:
+            data = None
+        else:
+            data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
 
-        if packing:
+        if is_dataless:
+            # The variable must have *some* dtype, and it must be maskable
+            dtype = self._DATALESS_DTYPE
+            fill_value = self._DATALESS_FILLVALUE
+        elif not packing:
+            dtype = data.dtype.newbyteorder("=")
+        else:
             if isinstance(packing, dict):
                 if "dtype" not in packing:
                     msg = "The dtype attribute is required for packing."
@@ -2355,8 +2369,6 @@ def _create_cf_data_variable(
                         add_offset = (cmax + cmin) / 2
                     else:
                         add_offset = cmin + 2 ** (n - 1) * scale_factor
-        else:
-            dtype = data.dtype.newbyteorder("=")
 
         def set_packing_ncattrs(cfvar):
             """Set netCDF packing attributes.
@@ -2380,8 +2392,9 @@ def set_packing_ncattrs(cfvar):
             cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs
         )
 
-        set_packing_ncattrs(cf_var)
-        self._lazy_stream_data(data=data, cf_var=cf_var)
+        if not is_dataless:
+            set_packing_ncattrs(cf_var)
+            self._lazy_stream_data(data=data, cf_var=cf_var)
 
         if cube.standard_name:
             _setncattr(cf_var, "standard_name", cube.standard_name)
@@ -2446,6 +2459,10 @@ def set_packing_ncattrs(cfvar):
 
             _setncattr(cf_var, attr_name, value)
 
+        # Add the 'dataless' marker if needed
+        if is_dataless:
+            _setncattr(cf_var, self._DATALESS_ATTRNAME, "true")
+
         # Create the CF-netCDF data variable cell method attribute.
         cell_methods = self._create_cf_cell_methods(cube, dimension_names)
 
diff --git a/lib/iris/tests/integration/netcdf/test_dataless.py b/lib/iris/tests/integration/netcdf/test_dataless.py
@@ -0,0 +1,87 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Integration tests for save+load of datales cubes."""
+
+import numpy as np
+import pytest
+
+import iris
+from iris.coords import DimCoord
+from iris.cube import Cube
+from iris.fileformats.netcdf._thread_safe_nc import netCDF4 as nc
+from iris.fileformats.netcdf.saver import Saver
+
+
+class TestDataless:
+    @pytest.fixture(autouse=True)
+    def setup(self, tmp_path_factory):
+        ny, nx = 3, 4
+        self.testcube = Cube(
+            shape=(ny, nx),
+            long_name="testdata",
+            dim_coords_and_dims=[
+                (DimCoord(np.arange(ny), long_name="y"), 0),
+                (DimCoord(np.arange(nx), long_name="x"), 1),
+            ],
+        )
+        self.testdir = tmp_path_factory.mktemp("dataless")
+        self.test_path = self.testdir / "test.nc"
+
+    @staticmethod
+    def _strip_saveload_additions(reloaded_cube):
+        reloaded_cube.attributes.pop("Conventions", None)
+        reloaded_cube.var_name = None
+        for co in reloaded_cube.coords():
+            co.var_name = None
+
+    def test_dataless_save(self):
+        # Check that we can save a dataless cube, and what that looks like in the file.
+        iris.save(self.testcube, self.test_path)
+        assert Saver._DATALESS_ATTRNAME not in self.testcube.attributes
+        # Check the content as seen in the file
+        ncds = nc.Dataset(self.test_path)
+        var = ncds.variables["testdata"]
+        assert Saver._DATALESS_ATTRNAME in var.ncattrs()
+        assert var.dtype == Saver._DATALESS_DTYPE
+        assert "_FillValue" in var.ncattrs()
+        assert var._FillValue == Saver._DATALESS_FILLVALUE
+        assert np.all(np.ma.getmaskarray(var[:]) == True)  # noqa: E712
+
+    def test_dataless_load(self):
+        # Check that we can load a saved dataless cube, and it matches the original.
+        iris.save(self.testcube, self.test_path)
+
+        # NB Load with load_raw, since we haven't finished supporting dataless merge.
+        (result_cube,) = iris.load_raw(self.test_path)
+        assert result_cube.is_dataless()
+        assert "iris_dataless_cube" not in result_cube.attributes
+
+        # strip off extra things added by netcdf save+load
+        self._strip_saveload_additions(result_cube)
+
+        # Result now == original
+        assert result_cube == self.testcube
+
+    def test_mixture_saveload(self):
+        # Check that a mixture of dataless and "normal" cubes can be saved + loaded back
+        dataless = self.testcube
+        ny = dataless.shape[0]
+        dataful = Cube(
+            np.ones((ny, 3)),
+            long_name="other",
+            dim_coords_and_dims=[(dataless.coord("y"), 0)],
+        )
+        iris.save([dataless, dataful], self.test_path)
+        # NB Load with load_raw, since we haven't finished supporting dataless merge.
+        cubes = iris.load_raw(self.test_path)
+        assert len(cubes) == 2
+        read_dataless = cubes.extract_cube("testdata")
+        read_dataful = cubes.extract_cube("other")
+        assert read_dataless.is_dataless()
+        assert not read_dataful.is_dataless()
+        for cube in (read_dataless, read_dataful):
+            self._strip_saveload_additions(cube)
+        assert read_dataless == dataless
+        assert read_dataful == dataful