Skip to content

Commit 98193c0

Browse files
authored
Support NetCDF zlib compression serialization of UGRID meshes (#6728)
* support mesh saving with compression * add test coverage * add whatsnew entry * add review comments * review actions * review action * backout note
1 parent 6eee652 commit 98193c0

File tree

3 files changed

+168
-22
lines changed

3 files changed

+168
-22
lines changed

docs/src/whatsnew/latest.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ This document explains the changes made to Iris for this release
3434
horizontal grid.
3535
(:issue:`5770`, :pull:`6581`)
3636

37+
#. `@bjlittle`_ extended ``zlib`` compression of :class:`~iris.cube.Cube` data
38+
payload when saving to NetCDF to also include any attached `CF-UGRID`_
39+
:class:`~iris.mesh.components.MeshXY`. Additionally,
40+
:func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib``
41+
compression. (:issue:`6565`, :pull:`6728`)
42+
3743

3844
🐛 Bugs Fixed
3945
=============

lib/iris/fileformats/netcdf/saver.py

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -587,11 +587,6 @@ def write(
587587
all_dimensions = mesh_dimensions + nonmesh_dimensions
588588
self._create_cf_dimensions(cube, all_dimensions, unlimited_dimensions)
589589

590-
# Create the mesh components, if there is a mesh.
591-
# We do this before creating the data-var, so that mesh vars precede
592-
# data-vars in the file.
593-
cf_mesh_name = self._add_mesh(cube)
594-
595590
# Group the generic compression keyword arguments together for
596591
# convenience, as they will be applied to other cube metadata
597592
# as well as the cube data payload.
@@ -602,6 +597,11 @@ def write(
602597
"zlib": zlib,
603598
}
604599

600+
# Create the mesh components, if there is a mesh.
601+
# We do this before creating the data-var, so that mesh vars precede
602+
# data-vars in the file.
603+
cf_mesh_name = self._add_mesh(cube, compression_kwargs=compression_kwargs)
604+
605605
# Create the associated cube CF-netCDF data variable.
606606
cf_var_cube = self._create_cf_data_variable(
607607
cube,
@@ -772,7 +772,7 @@ def _create_cf_dimensions(self, cube, dimension_names, unlimited_dimensions=None
772772
size = self._existing_dim[dim_name]
773773
self._dataset.createDimension(dim_name, size)
774774

775-
def _add_mesh(self, cube_or_mesh):
775+
def _add_mesh(self, cube_or_mesh, /, *, compression_kwargs=None):
776776
"""Add the cube's mesh, and all related variables to the dataset.
777777
778778
Add the cube's mesh, and all related variables to the dataset.
@@ -787,6 +787,8 @@ def _add_mesh(self, cube_or_mesh):
787787
----------
788788
cube_or_mesh : :class:`iris.cube.Cube` or :class:`iris.mesh.MeshXY`
789789
The Cube or Mesh being saved to the netCDF file.
790+
compression_kwargs : dict, optional
791+
NetCDF data compression keyword arguments.
790792
791793
Returns
792794
-------
@@ -801,10 +803,8 @@ def _add_mesh(self, cube_or_mesh):
801803
from iris.cube import Cube
802804

803805
if isinstance(cube_or_mesh, Cube):
804-
cube = cube_or_mesh
805-
mesh = cube.mesh
806+
mesh = cube_or_mesh.mesh
806807
else:
807-
cube = None # The underlying routines must support this !
808808
mesh = cube_or_mesh
809809

810810
if mesh:
@@ -830,10 +830,11 @@ def _add_mesh(self, cube_or_mesh):
830830
if coord is None:
831831
continue # an awkward thing that mesh.coords does
832832
coord_name = self._create_generic_cf_array_var(
833-
cube_or_mesh,
833+
mesh,
834834
[],
835835
coord,
836836
element_dims=(mesh_dims[location],),
837+
compression_kwargs=compression_kwargs,
837838
)
838839
# Only created once per file, but need to fetch the
839840
# name later in _add_inner_related_vars().
@@ -878,11 +879,12 @@ def _add_mesh(self, cube_or_mesh):
878879
else:
879880
fill_value = None
880881
cf_conn_name = self._create_generic_cf_array_var(
881-
cube_or_mesh,
882+
mesh,
882883
[],
883884
conn,
884885
element_dims=conn_dims,
885886
fill_value=fill_value,
887+
compression_kwargs=compression_kwargs,
886888
)
887889
# Add essential attributes to the Connectivity variable.
888890
cf_conn_var = self._dataset.variables[cf_conn_name]
@@ -1757,7 +1759,7 @@ def _create_generic_cf_array_var(
17571759
An Iris :class:`iris.coords._DimensionalMetadata`, belonging to the
17581760
cube. Provides data, units and standard/long/var names.
17591761
Not used if 'element_dims' is not None.
1760-
element_dims : list of str, optionsl
1762+
element_dims : list of str, optional
17611763
If set, contains the variable dimension (names),
17621764
otherwise these are taken from `element.cube_dims[cube]`.
17631765
For Mesh components (element coordinates and connectivities), this
@@ -1793,7 +1795,7 @@ def _create_generic_cf_array_var(
17931795
while cf_name in self._dataset.variables:
17941796
cf_name = self._increment_name(cf_name)
17951797

1796-
if element_dims is None:
1798+
if cube and element_dims is None:
17971799
# Get the list of file-dimensions (names), to create the variable.
17981800
element_dims = [
17991801
cube_dim_names[dim] for dim in element.cube_dims(cube)
@@ -1804,7 +1806,8 @@ def _create_generic_cf_array_var(
18041806
# (e.g. =points if a coord, =data if an ancillary, etc)
18051807
data = element._core_values()
18061808

1807-
if cube is None or cube.shape != data.shape:
1809+
# This compression contract is *not* applicable to a mesh.
1810+
if cube and cube.shape != data.shape:
18081811
compression_kwargs = {}
18091812

18101813
if np.issubdtype(data.dtype, np.str_):
@@ -2993,7 +2996,17 @@ def is_valid_packspec(p):
29932996
return result
29942997

29952998

2996-
def save_mesh(mesh, filename, netcdf_format="NETCDF4"):
2999+
def save_mesh(
3000+
mesh,
3001+
filename,
3002+
/,
3003+
*,
3004+
complevel=4,
3005+
fletcher32=False,
3006+
netcdf_format="NETCDF4",
3007+
shuffle=True,
3008+
zlib=False,
3009+
):
29973010
"""Save mesh(es) to a netCDF file.
29983011
29993012
Parameters
@@ -3002,16 +3015,37 @@ def save_mesh(mesh, filename, netcdf_format="NETCDF4"):
30023015
Mesh(es) to save.
30033016
filename : str
30043017
Name of the netCDF file to create.
3018+
complevel : int, default=4
3019+
An integer between 1 and 9 describing the level of compression
3020+
desired. Ignored if ``zlib=False``.
3021+
fletcher32 : bool, default=False
3022+
If ``True``, the Fletcher32 HDF5 checksum algorithm is activated to
3023+
detect errors.
30053024
netcdf_format : str, default="NETCDF4"
3006-
Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC',
3007-
'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format.
3025+
Underlying netCDF file format, one of ``NETCDF4``, ``NETCDF4_CLASSIC``,
3026+
``NETCDF3_CLASSIC`` or ``NETCDF3_64BIT``. Default is ``NETCDF4`` format.
3027+
shuffle : bool, default=True
3028+
If ``True``, the HDF5 shuffle filter will be applied before
3029+
compressing the data. This significantly improves compression.
3030+
Ignored if ``zlib=False``.
3031+
zlib : bool, default=False
3032+
If ``True``, the data will be compressed in the netCDF file using
3033+
gzip compression.
30083034
30093035
"""
30103036
if isinstance(mesh, typing.Iterable):
30113037
meshes = mesh
30123038
else:
30133039
meshes = [mesh]
30143040

3041+
# Group the generic compression keyword arguments together.
3042+
compression_kwargs = {
3043+
"complevel": complevel,
3044+
"fletcher32": fletcher32,
3045+
"shuffle": shuffle,
3046+
"zlib": zlib,
3047+
}
3048+
30153049
# Initialise Manager for saving
30163050
with Saver(filename, netcdf_format) as sman:
30173051
# Iterate through the list.
@@ -3023,7 +3057,7 @@ def save_mesh(mesh, filename, netcdf_format="NETCDF4"):
30233057
sman._create_cf_dimensions(cube=None, dimension_names=mesh_dimensions)
30243058

30253059
# Create the mesh components.
3026-
sman._add_mesh(mesh)
3060+
sman._add_mesh(mesh, compression_kwargs=compression_kwargs)
30273061

30283062
# Add a conventions attribute.
30293063
# TODO: add 'UGRID' to conventions, when this is agreed with CF ?

lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver__ugrid.py

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,35 @@ def vars_meshdim(vars, location, mesh_name=None):
329329
return single_location_dim
330330

331331

332+
def filter_compression_calls(patch, compression_kwargs, mismatch=False):
333+
"""Pre-process the mock patch calls for compression kwargs.
334+
335+
Parameters
336+
----------
337+
patch : mock
338+
The mock patch instance to be inspected.
339+
compression_kwargs : dict
340+
The compression kwargs to match as parameters in calls.
341+
mismatch : bool, default=False
342+
Whether to check for calls that do not match the compression kwargs.
343+
344+
Returns
345+
-------
346+
set
347+
The netCDF variables serialized with compression kwargs (or not).
348+
349+
"""
350+
result = set()
351+
for call in patch.call_args_list:
352+
kwargs = call.kwargs
353+
if all(kwargs.get(k) == v for k, v in compression_kwargs.items()):
354+
if not mismatch:
355+
result.add(call.args[0])
356+
elif mismatch:
357+
result.add(call.args[0])
358+
return result
359+
360+
332361
class TestSaveUgrid__cube(tests.IrisTest):
333362
"""Test for saving cubes which have meshes."""
334363

@@ -340,7 +369,7 @@ def setUpClass(cls):
340369
def tearDownClass(cls):
341370
shutil.rmtree(cls.temp_dir)
342371

343-
def check_save_cubes(self, cube_or_cubes):
372+
def check_save_cubes(self, cube_or_cubes, compression_kwargs=None):
344373
"""Write cubes to a new file in the common temporary directory.
345374
346375
Use a name unique to this testcase, to avoid any clashes.
@@ -352,11 +381,52 @@ def check_save_cubes(self, cube_or_cubes):
352381
# in the common temporary directory.
353382
tempfile_path = self.temp_dir / Path(tempfile_path).name
354383

384+
if compression_kwargs is None:
385+
compression_kwargs = {}
386+
355387
# Save data to the file.
356-
save(cube_or_cubes, tempfile_path)
388+
save(cube_or_cubes, tempfile_path, **compression_kwargs)
357389

358390
return tempfile_path
359391

392+
def test_compression(self):
393+
"""Test NetCDF serialization of a cube with attached mesh using compression.
394+
395+
NetCDF data compression keyword arguments include "complevel",
396+
"fletcher32", "shuffle" and "zlib". Note that "complevel" and "shuffle"
397+
are only applicable when "zlib=True".
398+
399+
"""
400+
# Note that the patch location is "_thread_safe_nc" when it is imported
401+
# into the iris.fileformats.netcdf.saver. Also we want to check that the
402+
# compression kwargs are passed into the NetCDF4 createVariable method
403+
patch = self.patch(
404+
"iris.fileformats.netcdf.saver._thread_safe_nc.DatasetWrapper.createVariable",
405+
)
406+
# No need to patch this NetCDF4 variable to compensate for the previous patch
407+
# on createVariable, which doesn't actually create the variable.
408+
self.patch(
409+
"iris.fileformats.netcdf.saver._thread_safe_nc.DatasetWrapper.variables"
410+
)
411+
cube = make_cube(var_name=(var_name := "a"))
412+
compression_kwargs = {
413+
"complevel": 9,
414+
"fletcher32": True,
415+
"shuffle": True,
416+
"zlib": True,
417+
}
418+
419+
_ = self.check_save_cubes(cube, compression_kwargs=compression_kwargs)
420+
421+
# The following mesh components and cube should be compressed on serialization.
422+
result = filter_compression_calls(patch, compression_kwargs)
423+
expected = {"node_x", "node_y", "face_x", "face_y", "mesh2d_faces", var_name}
424+
assert result == expected
425+
# The primary mesh variable (no payload) is never compressed.
426+
result = filter_compression_calls(patch, compression_kwargs, mismatch=True)
427+
expected = {"Mesh2d"}
428+
assert result == expected
429+
360430
def test_basic_mesh(self):
361431
# Save a small mesh example and check aspects of the resulting file.
362432
cube = make_cube() # A simple face-mapped data example.
@@ -686,7 +756,7 @@ def setUpClass(cls):
686756
def tearDownClass(cls):
687757
shutil.rmtree(cls.temp_dir)
688758

689-
def check_save_mesh(self, mesh):
759+
def check_save_mesh(self, mesh, compression_kwargs=None):
690760
"""Write a mesh to a new file in the common temporary directory.
691761
692762
Use a name unique to this testcase, to avoid any clashes.
@@ -698,11 +768,47 @@ def check_save_mesh(self, mesh):
698768
# in the common temporary directory.
699769
tempfile_path = self.temp_dir / Path(tempfile_path).name
700770

771+
if compression_kwargs is None:
772+
compression_kwargs = {}
773+
701774
# Save data to the file.
702-
save_mesh(mesh, tempfile_path)
775+
save_mesh(mesh, tempfile_path, **compression_kwargs)
703776

704777
return tempfile_path
705778

779+
def test_compression(self):
780+
"""Test NetCDF serialization of a mesh using compression.
781+
782+
NetCDF data compression keyword arguments include "complevel",
783+
"fletcher32", "shuffle" and "zlib". Note that "complevel" and "shuffle"
784+
are only applicable when "zlib=True".
785+
786+
"""
787+
patch = self.patch(
788+
"iris.fileformats.netcdf.saver._thread_safe_nc.DatasetWrapper.createVariable",
789+
)
790+
self.patch(
791+
"iris.fileformats.netcdf.saver._thread_safe_nc.DatasetWrapper.variables"
792+
)
793+
mesh = make_mesh()
794+
compression_kwargs = {
795+
"complevel": 9,
796+
"fletcher32": True,
797+
"shuffle": True,
798+
"zlib": True,
799+
}
800+
801+
_ = self.check_save_mesh(mesh, compression_kwargs=compression_kwargs)
802+
803+
# The following mesh components should be compressed on serialization.
804+
result = filter_compression_calls(patch, compression_kwargs)
805+
expected = {"node_x", "node_y", "face_x", "face_y", "mesh2d_faces"}
806+
assert result == expected
807+
# The primary mesh variable (no payload) is never compressed.
808+
result = filter_compression_calls(patch, compression_kwargs, mismatch=True)
809+
expected = {"Mesh2d"}
810+
assert result == expected
811+
706812
def test_connectivity_dim_order(self):
707813
"""Test a mesh with some connectivities in the 'other' order.
708814

0 commit comments

Comments
 (0)