Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/src/further_topics/netcdf_io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,17 @@ Deferred Saving
TBC


Dataless Cubes
--------------
It now possible to have "dataless" cubes, where ``cube.data is None``.
When these are saved to a NetCDF file interface, this results in a netcdf file variable
with all-unwritten data (meaning that it takes up no storage space).

In order to load such variables back correctly, we also add an extra
``iris_dataless_cube = "true"`` attribute : this tells the loader to skip array creation
when loading back in, so that the read-back cube is also dataless.


Guessing Coordinate Axes
------------------------

Expand Down
3 changes: 3 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ This document explains the changes made to Iris for this release
:func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib``
compression. (:issue:`6565`, :pull:`6728`)

#. `@pp-mo`_ made it possible to save 'dataless' cubes to a netcdf file, and load them
back again. (:issue:`6727`, :pull:`6739`)

#. `@ukmo-ccbunney`_ added a new :class:`~iris.util.CMLSettings` class to control
the formatting of Cube CML output via a context manager.
(:issue:`6244`, :pull:`6743`)
Expand Down
13 changes: 11 additions & 2 deletions lib/iris/fileformats/netcdf/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,17 @@ def _load_cube_inner(engine, cf, cf_var, filename):
from iris.cube import Cube

"""Create the cube associated with the CF-netCDF data variable."""
data = _get_cf_var_data(cf_var)
cube = Cube(data)
from iris.fileformats.netcdf.saver import Saver

if hasattr(cf_var, Saver._DATALESS_ATTRNAME):
# This data-variable represents a dataless cube.
# The variable array content was never written (to take up no space).
data = None
shape = cf_var.shape
else:
data = _get_cf_var_data(cf_var)
shape = None
cube = Cube(data=data, shape=shape)

# Reset the actions engine.
engine.reset()
Expand Down
29 changes: 23 additions & 6 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2275,6 +2275,10 @@ def _create_cf_grid_mapping(self, cube, cf_var_cube):
if grid_mapping:
_setncattr(cf_var_cube, "grid_mapping", grid_mapping)

_DATALESS_ATTRNAME = "iris_dataless_cube"
_DATALESS_DTYPE = np.dtype("u1")
_DATALESS_FILLVALUE = 127

def _create_cf_data_variable(
self,
cube,
Expand Down Expand Up @@ -2315,9 +2319,19 @@ def _create_cf_data_variable(
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
is_dataless = cube.is_dataless()
if is_dataless:
data = None
else:
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)

if packing:
if is_dataless:
# The variable must have *some* dtype, and it must be maskable
dtype = self._DATALESS_DTYPE
fill_value = self._DATALESS_FILLVALUE
elif not packing:
dtype = data.dtype.newbyteorder("=")
else:
if isinstance(packing, dict):
if "dtype" not in packing:
msg = "The dtype attribute is required for packing."
Expand Down Expand Up @@ -2355,8 +2369,6 @@ def _create_cf_data_variable(
add_offset = (cmax + cmin) / 2
else:
add_offset = cmin + 2 ** (n - 1) * scale_factor
else:
dtype = data.dtype.newbyteorder("=")

def set_packing_ncattrs(cfvar):
"""Set netCDF packing attributes.
Expand All @@ -2380,8 +2392,9 @@ def set_packing_ncattrs(cfvar):
cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs
)

set_packing_ncattrs(cf_var)
self._lazy_stream_data(data=data, cf_var=cf_var)
if not is_dataless:
set_packing_ncattrs(cf_var)
self._lazy_stream_data(data=data, cf_var=cf_var)

if cube.standard_name:
_setncattr(cf_var, "standard_name", cube.standard_name)
Expand Down Expand Up @@ -2446,6 +2459,10 @@ def set_packing_ncattrs(cfvar):

_setncattr(cf_var, attr_name, value)

# Add the 'dataless' marker if needed
if is_dataless:
_setncattr(cf_var, self._DATALESS_ATTRNAME, "true")

# Create the CF-netCDF data variable cell method attribute.
cell_methods = self._create_cf_cell_methods(cube, dimension_names)

Expand Down
102 changes: 102 additions & 0 deletions lib/iris/tests/integration/netcdf/test_dataless.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Integration tests for save+load of datales cubes."""

import numpy as np
import pytest

import iris
from iris.coords import DimCoord
from iris.cube import Cube
from iris.fileformats.netcdf._thread_safe_nc import DatasetWrapper
from iris.fileformats.netcdf.saver import Saver


class TestDataless:
@pytest.fixture(autouse=True)
def setup(self, tmp_path_factory):
ny, nx = 3, 4
self.testcube = Cube(
shape=(ny, nx),
long_name="testdata",
dim_coords_and_dims=[
(DimCoord(np.arange(ny), long_name="y"), 0),
(DimCoord(np.arange(nx), long_name="x"), 1),
],
)
self.testdir = tmp_path_factory.mktemp("dataless")
self.test_path = self.testdir / "test.nc"

@staticmethod
def _strip_saveload_additions(reloaded_cube):
reloaded_cube.attributes.pop("Conventions", None)
reloaded_cube.var_name = None
for co in reloaded_cube.coords():
co.var_name = None

def test_dataless_save(self):
# Check that we can save a dataless cube, and what that looks like in the file.
iris.save(self.testcube, self.test_path)
assert Saver._DATALESS_ATTRNAME not in self.testcube.attributes
# Check the content as seen in the file
ncds = DatasetWrapper(self.test_path)
var = ncds.variables["testdata"]
assert Saver._DATALESS_ATTRNAME in var.ncattrs()
assert var.dtype == Saver._DATALESS_DTYPE
assert "_FillValue" in var.ncattrs()
assert var._FillValue == Saver._DATALESS_FILLVALUE
assert np.all(np.ma.getmaskarray(var[:]) == True) # noqa: E712

def test_dataless_load(self):
# Check that we can load a saved dataless cube, and it matches the original.
iris.save(self.testcube, self.test_path)

# NB Load with load_raw, since we haven't finished supporting dataless merge.
(result_cube,) = iris.load_raw(self.test_path)
assert result_cube.is_dataless()
assert "iris_dataless_cube" not in result_cube.attributes

# strip off extra things added by netcdf save+load
self._strip_saveload_additions(result_cube)

# Result now == original
assert result_cube == self.testcube

def test_mixture_saveload(self):
# Check that a mixture of dataless and "normal" cubes can be saved + loaded back
dataless = self.testcube
ny = dataless.shape[0]
dataful = Cube(
np.ones((ny, 3)),
long_name="other",
dim_coords_and_dims=[(dataless.coord("y"), 0)],
)
iris.save([dataless, dataful], self.test_path)
# NB Load with load_raw, since we haven't finished supporting dataless merge.
cubes = iris.load_raw(self.test_path)
assert len(cubes) == 2
read_dataless = cubes.extract_cube("testdata")
read_dataful = cubes.extract_cube("other")
assert read_dataless.is_dataless()
assert not read_dataful.is_dataless()
for cube in (read_dataless, read_dataful):
self._strip_saveload_additions(cube)
assert read_dataless == dataless
assert read_dataful == dataful

def test_nodata_size(self):
# Check that a file saved with a large dataless cube does *not* occupy a large
# amount of diskspace.
ny, nx = 10000, 10000
data_dims = (ny, nx)
dataless_cube = Cube(shape=data_dims)

iris.save(dataless_cube, self.test_path)

data_size_bytes = ny * nx # bytes, since dtype is "u1" (approx 100Mb)
filesize_bytes = self.test_path.stat().st_size
# Check that the file size < 1/10 variable array size
# The 0.1 is a bit arbitrary, but it makes the point!
assert filesize_bytes < 0.1 * data_size_bytes
Loading