From 354f3e5db41b77d3d7cdd87eeb86765e635a4da8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 7 Oct 2025 14:03:23 +0100 Subject: [PATCH 1/4] Dataless netcdf load+save; plus tests. --- lib/iris/fileformats/netcdf/loader.py | 13 ++- lib/iris/fileformats/netcdf/saver.py | 29 +++++-- .../tests/integration/netcdf/test_dataless.py | 87 +++++++++++++++++++ 3 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 lib/iris/tests/integration/netcdf/test_dataless.py diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py index 216df67590..e8d283beb8 100644 --- a/lib/iris/fileformats/netcdf/loader.py +++ b/lib/iris/fileformats/netcdf/loader.py @@ -392,8 +392,17 @@ def _load_cube_inner(engine, cf, cf_var, filename): from iris.cube import Cube """Create the cube associated with the CF-netCDF data variable.""" - data = _get_cf_var_data(cf_var) - cube = Cube(data) + from iris.fileformats.netcdf.saver import Saver + + if hasattr(cf_var, Saver._DATALESS_ATTRNAME): + # This data-variable represents a dataless cube. + # The variable array content was never written (to take up no space). + data = None + shape = cf_var.shape + else: + data = _get_cf_var_data(cf_var) + shape = None + cube = Cube(data=data, shape=shape) # Reset the actions engine. engine.reset() diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py index 67f011f724..5177749c07 100644 --- a/lib/iris/fileformats/netcdf/saver.py +++ b/lib/iris/fileformats/netcdf/saver.py @@ -2275,6 +2275,10 @@ def _create_cf_grid_mapping(self, cube, cf_var_cube): if grid_mapping: _setncattr(cf_var_cube, "grid_mapping", grid_mapping) + _DATALESS_ATTRNAME = "iris_dataless_cube" + _DATALESS_DTYPE = np.dtype("u1") + _DATALESS_FILLVALUE = 127 + def _create_cf_data_variable( self, cube, @@ -2315,9 +2319,19 @@ def _create_cf_data_variable( # TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can # be removed. # Get the values in a form which is valid for the file format. - data = self._ensure_valid_dtype(cube.core_data(), "cube", cube) + is_dataless = cube.is_dataless() + if is_dataless: + data = None + else: + data = self._ensure_valid_dtype(cube.core_data(), "cube", cube) - if packing: + if is_dataless: + # The variable must have *some* dtype, and it must be maskable + dtype = self._DATALESS_DTYPE + fill_value = self._DATALESS_FILLVALUE + elif not packing: + dtype = data.dtype.newbyteorder("=") + else: if isinstance(packing, dict): if "dtype" not in packing: msg = "The dtype attribute is required for packing." @@ -2355,8 +2369,6 @@ def _create_cf_data_variable( add_offset = (cmax + cmin) / 2 else: add_offset = cmin + 2 ** (n - 1) * scale_factor - else: - dtype = data.dtype.newbyteorder("=") def set_packing_ncattrs(cfvar): """Set netCDF packing attributes. @@ -2380,8 +2392,9 @@ def set_packing_ncattrs(cfvar): cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs ) - set_packing_ncattrs(cf_var) - self._lazy_stream_data(data=data, cf_var=cf_var) + if not is_dataless: + set_packing_ncattrs(cf_var) + self._lazy_stream_data(data=data, cf_var=cf_var) if cube.standard_name: _setncattr(cf_var, "standard_name", cube.standard_name) @@ -2446,6 +2459,10 @@ def set_packing_ncattrs(cfvar): _setncattr(cf_var, attr_name, value) + # Add the 'dataless' marker if needed + if is_dataless: + _setncattr(cf_var, self._DATALESS_ATTRNAME, "true") + # Create the CF-netCDF data variable cell method attribute. cell_methods = self._create_cf_cell_methods(cube, dimension_names) diff --git a/lib/iris/tests/integration/netcdf/test_dataless.py b/lib/iris/tests/integration/netcdf/test_dataless.py new file mode 100644 index 0000000000..77edb5325e --- /dev/null +++ b/lib/iris/tests/integration/netcdf/test_dataless.py @@ -0,0 +1,87 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Integration tests for save+load of datales cubes.""" + +import numpy as np +import pytest + +import iris +from iris.coords import DimCoord +from iris.cube import Cube +from iris.fileformats.netcdf._thread_safe_nc import netCDF4 as nc +from iris.fileformats.netcdf.saver import Saver + + +class TestDataless: + @pytest.fixture(autouse=True) + def setup(self, tmp_path_factory): + ny, nx = 3, 4 + self.testcube = Cube( + shape=(ny, nx), + long_name="testdata", + dim_coords_and_dims=[ + (DimCoord(np.arange(ny), long_name="y"), 0), + (DimCoord(np.arange(nx), long_name="x"), 1), + ], + ) + self.testdir = tmp_path_factory.mktemp("dataless") + self.test_path = self.testdir / "test.nc" + + @staticmethod + def _strip_saveload_additions(reloaded_cube): + reloaded_cube.attributes.pop("Conventions", None) + reloaded_cube.var_name = None + for co in reloaded_cube.coords(): + co.var_name = None + + def test_dataless_save(self): + # Check that we can save a dataless cube, and what that looks like in the file. + iris.save(self.testcube, self.test_path) + assert Saver._DATALESS_ATTRNAME not in self.testcube.attributes + # Check the content as seen in the file + ncds = nc.Dataset(self.test_path) + var = ncds.variables["testdata"] + assert Saver._DATALESS_ATTRNAME in var.ncattrs() + assert var.dtype == Saver._DATALESS_DTYPE + assert "_FillValue" in var.ncattrs() + assert var._FillValue == Saver._DATALESS_FILLVALUE + assert np.all(np.ma.getmaskarray(var[:]) == True) # noqa: E712 + + def test_dataless_load(self): + # Check that we can load a saved dataless cube, and it matches the original. + iris.save(self.testcube, self.test_path) + + # NB Load with load_raw, since we haven't finished supporting dataless merge. + (result_cube,) = iris.load_raw(self.test_path) + assert result_cube.is_dataless() + assert "iris_dataless_cube" not in result_cube.attributes + + # strip off extra things added by netcdf save+load + self._strip_saveload_additions(result_cube) + + # Result now == original + assert result_cube == self.testcube + + def test_mixture_saveload(self): + # Check that a mixture of dataless and "normal" cubes can be saved + loaded back + dataless = self.testcube + ny = dataless.shape[0] + dataful = Cube( + np.ones((ny, 3)), + long_name="other", + dim_coords_and_dims=[(dataless.coord("y"), 0)], + ) + iris.save([dataless, dataful], self.test_path) + # NB Load with load_raw, since we haven't finished supporting dataless merge. + cubes = iris.load_raw(self.test_path) + assert len(cubes) == 2 + read_dataless = cubes.extract_cube("testdata") + read_dataful = cubes.extract_cube("other") + assert read_dataless.is_dataless() + assert not read_dataful.is_dataless() + for cube in (read_dataless, read_dataful): + self._strip_saveload_additions(cube) + assert read_dataless == dataless + assert read_dataful == dataful From 00b0960f855a55cb84c90c3e7580799d4621b7d8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 17:59:10 +0100 Subject: [PATCH 2/4] Use thread-safe DatasetWrapper to satisfy coding-standards check for netCDF4 usage. --- lib/iris/tests/integration/netcdf/test_dataless.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test_dataless.py b/lib/iris/tests/integration/netcdf/test_dataless.py index 77edb5325e..d09aaf3be3 100644 --- a/lib/iris/tests/integration/netcdf/test_dataless.py +++ b/lib/iris/tests/integration/netcdf/test_dataless.py @@ -10,7 +10,7 @@ import iris from iris.coords import DimCoord from iris.cube import Cube -from iris.fileformats.netcdf._thread_safe_nc import netCDF4 as nc +from iris.fileformats.netcdf._thread_safe_nc import DatasetWrapper from iris.fileformats.netcdf.saver import Saver @@ -41,7 +41,7 @@ def test_dataless_save(self): iris.save(self.testcube, self.test_path) assert Saver._DATALESS_ATTRNAME not in self.testcube.attributes # Check the content as seen in the file - ncds = nc.Dataset(self.test_path) + ncds = DatasetWrapper(self.test_path) var = ncds.variables["testdata"] assert Saver._DATALESS_ATTRNAME in var.ncattrs() assert var.dtype == Saver._DATALESS_DTYPE From ce6c7f840fbcc8f9bd3ab48d2111dca8ab349986 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 9 Oct 2025 16:29:27 +0100 Subject: [PATCH 3/4] Check that saved dataless cubes consume little file space. Further. --- .../tests/integration/netcdf/test_dataless.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/iris/tests/integration/netcdf/test_dataless.py b/lib/iris/tests/integration/netcdf/test_dataless.py index d09aaf3be3..442777ce18 100644 --- a/lib/iris/tests/integration/netcdf/test_dataless.py +++ b/lib/iris/tests/integration/netcdf/test_dataless.py @@ -85,3 +85,18 @@ def test_mixture_saveload(self): self._strip_saveload_additions(cube) assert read_dataless == dataless assert read_dataful == dataful + + def test_nodata_size(self): + # Check that a file saved with a large dataless cube does *not* occupy a large + # amount of diskspace. + ny, nx = 10000, 10000 + data_dims = (ny, nx) + dataless_cube = Cube(shape=data_dims) + + iris.save(dataless_cube, self.test_path) + + data_size_bytes = ny * nx # bytes, since dtype is "u1" (approx 100Mb) + filesize_bytes = self.test_path.stat().st_size + # Check that the file size < 1/10 variable array size + # The 0.1 is a bit arbitrary, but it makes the point! + assert filesize_bytes < 0.1 * data_size_bytes From 6a625b0cd7cf63a9ba9c2ecf2984eeddf755501d Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 9 Oct 2025 16:29:49 +0100 Subject: [PATCH 4/4] Add documentation links. --- docs/src/further_topics/netcdf_io.rst | 11 +++++++++++ docs/src/whatsnew/latest.rst | 3 +++ 2 files changed, 14 insertions(+) diff --git a/docs/src/further_topics/netcdf_io.rst b/docs/src/further_topics/netcdf_io.rst index 47d85ffeab..581e6759a8 100644 --- a/docs/src/further_topics/netcdf_io.rst +++ b/docs/src/further_topics/netcdf_io.rst @@ -189,6 +189,17 @@ Deferred Saving TBC +Dataless Cubes +-------------- +It now possible to have "dataless" cubes, where ``cube.data is None``. +When these are saved to a NetCDF file interface, this results in a netcdf file variable +with all-unwritten data (meaning that it takes up no storage space). + +In order to load such variables back correctly, we also add an extra +``iris_dataless_cube = "true"`` attribute : this tells the loader to skip array creation +when loading back in, so that the read-back cube is also dataless. + + Guessing Coordinate Axes ------------------------ diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 36d757e2f5..db2c70db9b 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -40,6 +40,9 @@ This document explains the changes made to Iris for this release :func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib`` compression. (:issue:`6565`, :pull:`6728`) +#. `@pp-mo`_ made it possible to save 'dataless' cubes to a netcdf file, and load them + back again. (:issue:`6727`, :pull:`6739`) + 🐛 Bugs Fixed =============