Skip to content

Commit c2bfca5

Browse files
authored
Merge branch 'main' into merge_dataless
2 parents 4f9da22 + 9c06cca commit c2bfca5

File tree

10 files changed

+695
-250
lines changed

10 files changed

+695
-250
lines changed

docs/src/further_topics/netcdf_io.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,17 @@ Deferred Saving
189189
TBC
190190

191191

192+
Dataless Cubes
193+
--------------
194+
It now possible to have "dataless" cubes, where ``cube.data is None``.
195+
When these are saved to a NetCDF file interface, this results in a netcdf file variable
196+
with all-unwritten data (meaning that it takes up no storage space).
197+
198+
In order to load such variables back correctly, we also add an extra
199+
``iris_dataless_cube = "true"`` attribute : this tells the loader to skip array creation
200+
when loading back in, so that the read-back cube is also dataless.
201+
202+
192203
Guessing Coordinate Axes
193204
------------------------
194205

docs/src/whatsnew/latest.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,19 @@ This document explains the changes made to Iris for this release
4545
Also added a new documentation section on dataless cubes.
4646
(:issue:`5770`, :pull:`6581`)
4747

48+
#. `@pp-mo`_ made it possible to save 'dataless' cubes to a netcdf file, and load them
49+
back again. (:issue:`6727`, :pull:`6739`)
50+
4851
#. `@ukmo-ccbunney`_ added a new :class:`~iris.util.CMLSettings` class to control
4952
the formatting of Cube CML output via a context manager.
5053
(:issue:`6244`, :pull:`6743`)
5154

55+
#. `@ESadek-MO`_ added functionality to allow :func:`~iris.cube.Cube.extract`,
56+
:func:`~iris.cube.Cube.collapsed`, :func:`~iris.cube.Cube.aggregated_by`,
57+
:func:`~iris.cube.Cube.convert_units`, :func:`~iris.cube.Cube.subset` and
58+
:func:`~iris.cube.Cube.slices` to work with dataless cubes.
59+
(:issue:`6725`, :pull:`6724`)
60+
5261

5362
🐛 Bugs Fixed
5463
=============

lib/iris/cube.py

Lines changed: 129 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,25 +1475,25 @@ def convert_units(self, unit: str | Unit) -> None:
14751475
This operation preserves lazy data.
14761476
14771477
"""
1478+
dataless = self.is_dataless()
14781479
# If the cube has units convert the data.
1479-
if self.is_dataless():
1480-
raise iris.exceptions.DatalessError("convert_units")
14811480
if self.units.is_unknown():
14821481
raise iris.exceptions.UnitConversionError(
14831482
"Cannot convert from unknown units. "
14841483
'The "cube.units" attribute may be set directly.'
14851484
)
1486-
if self.has_lazy_data():
1487-
# Make fixed copies of old + new units for a delayed conversion.
1488-
old_unit = Unit(self.units)
1489-
new_unit = unit
1485+
if not dataless:
1486+
if self.has_lazy_data():
1487+
# Make fixed copies of old + new units for a delayed conversion.
1488+
old_unit = Unit(self.units)
1489+
new_unit = unit
14901490

1491-
pointwise_convert = partial(old_unit.convert, other=new_unit)
1491+
pointwise_convert = partial(old_unit.convert, other=new_unit)
14921492

1493-
new_data = _lazy.lazy_elementwise(self.lazy_data(), pointwise_convert)
1494-
else:
1495-
new_data = self.units.convert(self.data, unit)
1496-
self.data = new_data
1493+
new_data = _lazy.lazy_elementwise(self.lazy_data(), pointwise_convert)
1494+
else:
1495+
new_data = self.units.convert(self.data, unit)
1496+
self.data = new_data
14971497
for key in "actual_range", "valid_max", "valid_min", "valid_range":
14981498
if key in self.attributes.locals:
14991499
self.attributes.locals[key] = self.units.convert(
@@ -3050,9 +3050,12 @@ def new_ancillary_variable_dims(av_):
30503050

30513051
# Fetch the data as a generic array-like object.
30523052
cube_data = self._data_manager.core_data()
3053+
dataless = self.is_dataless()
30533054

30543055
# Index with the keys, using orthogonal slicing.
3055-
dimension_mapping, data = iris.util._slice_data_with_keys(cube_data, keys)
3056+
dimension_mapping, data = iris.util._slice_data_with_keys(
3057+
cube_data, keys, shape=self.shape
3058+
)
30563059

30573060
# We don't want a view of the data, so take a copy of it.
30583061
data = deepcopy(data)
@@ -3064,14 +3067,11 @@ def new_ancillary_variable_dims(av_):
30643067
if isinstance(data, ma.core.MaskedConstant) and data.dtype != cube_data.dtype:
30653068
data = ma.array(data.data, mask=data.mask, dtype=cube_data.dtype)
30663069

3067-
# Make the new cube slice
3068-
cube = self.__class__(data)
3069-
cube.metadata = deepcopy(self.metadata)
3070-
30713070
# Record a mapping from old coordinate IDs to new coordinates,
30723071
# for subsequent use in creating updated aux_factories.
30733072
coord_mapping = {}
30743073

3074+
aux_coords = []
30753075
# Slice the coords
30763076
for coord in self.aux_coords:
30773077
coord_keys = tuple([full_slice[dim] for dim in self.coord_dims(coord)])
@@ -3081,28 +3081,52 @@ def new_ancillary_variable_dims(av_):
30813081
# TODO make this except more specific to catch monotonic error
30823082
# Attempt to slice it by converting to AuxCoord first
30833083
new_coord = iris.coords.AuxCoord.from_coord(coord)[coord_keys]
3084-
cube.add_aux_coord(new_coord, new_coord_dims(coord))
3084+
aux_coords.append((new_coord, new_coord_dims(coord)))
30853085
coord_mapping[id(coord)] = new_coord
30863086

3087-
for coord in self.dim_coords:
3088-
coord_keys = tuple([full_slice[dim] for dim in self.coord_dims(coord)])
3089-
new_dims = new_coord_dims(coord)
3090-
# Try/Catch to handle slicing that makes the points/bounds
3091-
# non-monotonic
3087+
dim_coords = []
3088+
shape = ()
3089+
3090+
for dim in range(self.ndim):
3091+
coord_keys = full_slice[dim]
30923092
try:
3093-
new_coord = coord[coord_keys]
3094-
if not new_dims:
3095-
# If the associated dimension has been sliced so the coord
3096-
# is a scalar move the coord to the aux_coords container
3097-
cube.add_aux_coord(new_coord, new_dims)
3098-
else:
3099-
cube.add_dim_coord(new_coord, new_dims)
3100-
except ValueError:
3101-
# TODO make this except more specific to catch monotonic error
3102-
# Attempt to slice it by converting to AuxCoord first
3103-
new_coord = iris.coords.AuxCoord.from_coord(coord)[coord_keys]
3104-
cube.add_aux_coord(new_coord, new_dims)
3105-
coord_mapping[id(coord)] = new_coord
3093+
coord = self.coord(dimensions=dim, dim_coords=True)
3094+
new_dims = new_coord_dims(coord)
3095+
# Try/Catch to handle slicing that makes the points/bounds
3096+
# non-monotonic
3097+
try:
3098+
new_coord = coord[coord_keys]
3099+
if not new_dims:
3100+
# If the associated dimension has been sliced so the coord
3101+
# is a scalar move the coord to the aux_coords container
3102+
aux_coords.append((new_coord, new_dims))
3103+
else:
3104+
dim_coords.append((new_coord, new_dims))
3105+
shape += new_coord.core_points().shape
3106+
except ValueError:
3107+
# TODO make this except more specific to catch monotonic error
3108+
# Attempt to slice it by converting to AuxCoord first
3109+
new_coord = iris.coords.AuxCoord.from_coord(coord)[coord_keys]
3110+
aux_coords.append((new_coord, new_dims))
3111+
coord_mapping[id(coord)] = new_coord
3112+
except iris.exceptions.CoordinateNotFoundError:
3113+
points = np.zeros(self.shape[dim])[coord_keys]
3114+
if points.shape != ():
3115+
dim_shape = points.shape
3116+
shape += dim_shape
3117+
3118+
# Make the new cube slice
3119+
if dataless:
3120+
cube = self.__class__(shape=shape)
3121+
else:
3122+
cube = self.__class__(data)
3123+
cube.metadata = deepcopy(self.metadata)
3124+
3125+
for coord, dim in dim_coords:
3126+
cube.add_dim_coord(coord, dim)
3127+
3128+
for coord, dims in aux_coords:
3129+
cube.add_aux_coord(coord, dims)
31063130

31073131
for factory in self.aux_factories:
31083132
cube.add_aux_factory(factory.updated(coord_mapping))
@@ -3131,8 +3155,6 @@ def subset(self, coord: AuxCoord | DimCoord) -> Cube | None:
31313155
whole cube is returned. As such, the operation is not strict.
31323156
31333157
"""
3134-
if self.is_dataless():
3135-
raise iris.exceptions.DatalessError("subset")
31363158
if not isinstance(coord, iris.coords.Coord):
31373159
raise ValueError("coord_to_extract must be a valid Coord.")
31383160

@@ -3780,9 +3802,6 @@ def slices(
37803802
dimension index.
37813803
37823804
""" # noqa: D214, D406, D407, D410, D411
3783-
if self.is_dataless():
3784-
raise iris.exceptions.DatalessError("slices")
3785-
37863805
if not isinstance(ordered, bool):
37873806
raise TypeError("'ordered' argument to slices must be boolean.")
37883807

@@ -3870,9 +3889,14 @@ def transpose(self, new_order: list[int] | None = None) -> None:
38703889

38713890
# Transpose the data payload.
38723891
dm = self._data_manager
3873-
if not self.is_dataless():
3892+
if self.is_dataless():
3893+
data = None
3894+
shape = dm.shape
3895+
else:
38743896
data = dm.core_data().transpose(new_order)
3875-
self._data_manager = DataManager(data)
3897+
shape = None
3898+
3899+
self._data_manager = DataManager(data=data, shape=shape)
38763900

38773901
dim_mapping = {src: dest for dest, src in enumerate(new_order)}
38783902

@@ -4403,8 +4427,6 @@ def collapsed(
44034427
cube.collapsed(['latitude', 'longitude'],
44044428
iris.analysis.VARIANCE)
44054429
"""
4406-
if self.is_dataless():
4407-
raise iris.exceptions.DatalessError("collapsed")
44084430
# Update weights kwargs (if necessary) to handle different types of
44094431
# weights
44104432
weights_info = None
@@ -4507,7 +4529,7 @@ def collapsed(
45074529

45084530
# If we weren't able to complete a lazy aggregation, compute it
45094531
# directly now.
4510-
if data_result is None:
4532+
if data_result is None and not self.is_dataless():
45114533
# Perform the (non-lazy) aggregation over the cube data
45124534
# First reshape the data so that the dimensions being aggregated
45134535
# over are grouped 'at the end' (i.e. axis=-1).
@@ -4625,8 +4647,6 @@ def aggregated_by(
46254647
STASH m01s00i024
46264648
46274649
"""
4628-
if self.is_dataless():
4629-
raise iris.exceptions.DatalessError("aggregated_by")
46304650
# Update weights kwargs (if necessary) to handle different types of
46314651
# weights
46324652
weights_info = None
@@ -4729,59 +4749,64 @@ def aggregated_by(
47294749
orig_id = id(self.coord(coord))
47304750
coord_mapping[orig_id] = coord
47314751

4732-
# Determine the group-by cube data shape.
4733-
data_shape = list(self.shape + aggregator.aggregate_shape(**kwargs))
4734-
data_shape[dimension_to_groupby] = len(groupby)
4735-
4736-
# Choose appropriate data and functions for data aggregation.
4737-
if aggregator.lazy_func is not None and self.has_lazy_data():
4738-
input_data = self.lazy_data()
4739-
agg_method = aggregator.lazy_aggregate
4740-
else:
4741-
input_data = self.data
4742-
agg_method = aggregator.aggregate
4743-
4744-
# Create data and weights slices.
4745-
front_slice = (slice(None),) * dimension_to_groupby
4746-
back_slice = (slice(None),) * (len(data_shape) - dimension_to_groupby - 1)
4747-
4748-
groupby_subarrs = (
4749-
iris.util._slice_data_with_keys(
4750-
input_data, front_slice + (groupby_slice,) + back_slice
4751-
)[1]
4752-
for groupby_slice in groupby.group()
4753-
)
4754-
4755-
if weights is not None:
4756-
groupby_subweights = (
4757-
weights[front_slice + (groupby_slice,) + back_slice]
4752+
if not self.is_dataless():
4753+
# Determine the group-by cube data shape.
4754+
data_shape = list(self.shape + aggregator.aggregate_shape(**kwargs))
4755+
data_shape[dimension_to_groupby] = len(groupby)
4756+
4757+
# Choose appropriate data and functions for data aggregation.
4758+
if aggregator.lazy_func is not None and self.has_lazy_data():
4759+
input_data = self.lazy_data()
4760+
agg_method = aggregator.lazy_aggregate
4761+
else:
4762+
input_data = self.data
4763+
agg_method = aggregator.aggregate
4764+
4765+
# Create data and weights slices.
4766+
front_slice = (slice(None),) * dimension_to_groupby
4767+
back_slice = (slice(None),) * (len(data_shape) - dimension_to_groupby - 1)
4768+
4769+
groupby_subarrs = (
4770+
iris.util._slice_data_with_keys(
4771+
input_data,
4772+
front_slice + (groupby_slice,) + back_slice,
4773+
shape=(self.shape),
4774+
)[1]
47584775
for groupby_slice in groupby.group()
47594776
)
4760-
else:
4761-
groupby_subweights = (None for _ in range(len(groupby)))
47624777

4763-
# Aggregate data slices.
4764-
agg = iris.analysis.create_weighted_aggregator_fn(
4765-
agg_method, axis=dimension_to_groupby, **kwargs
4766-
)
4767-
result = tuple(map(agg, groupby_subarrs, groupby_subweights))
4768-
4769-
# If weights are returned, "result" is a list of tuples (each tuple
4770-
# contains two elements; the first is the aggregated data, the
4771-
# second is the aggregated weights). Convert these to two lists
4772-
# (one for the aggregated data and one for the aggregated weights)
4773-
# before combining the different slices.
4774-
if return_weights:
4775-
data_result, weights_result = list(zip(*result))
4776-
aggregateby_weights = _lazy.stack(weights_result, axis=dimension_to_groupby)
4777-
else:
4778-
data_result = result
4779-
aggregateby_weights = None
4778+
if weights is not None:
4779+
groupby_subweights = (
4780+
weights[front_slice + (groupby_slice,) + back_slice]
4781+
for groupby_slice in groupby.group()
4782+
)
4783+
else:
4784+
groupby_subweights = (None for _ in range(len(groupby)))
47804785

4781-
aggregateby_data = _lazy.stack(data_result, axis=dimension_to_groupby)
4782-
# Ensure plain ndarray is output if plain ndarray was input.
4783-
if ma.isMaskedArray(aggregateby_data) and not ma.isMaskedArray(input_data):
4784-
aggregateby_data = ma.getdata(aggregateby_data)
4786+
# Aggregate data slices.
4787+
agg = iris.analysis.create_weighted_aggregator_fn(
4788+
agg_method, axis=dimension_to_groupby, **kwargs
4789+
)
4790+
result = tuple(map(agg, groupby_subarrs, groupby_subweights))
4791+
4792+
# If weights are returned, "result" is a list of tuples (each tuple
4793+
# contains two elements; the first is the aggregated data, the
4794+
# second is the aggregated weights). Convert these to two lists
4795+
# (one for the aggregated data and one for the aggregated weights)
4796+
# before combining the different slices.
4797+
if return_weights:
4798+
data_result, weights_result = list(zip(*result))
4799+
aggregateby_weights = _lazy.stack(
4800+
weights_result, axis=dimension_to_groupby
4801+
)
4802+
else:
4803+
data_result = result
4804+
aggregateby_weights = None
4805+
4806+
aggregateby_data = _lazy.stack(data_result, axis=dimension_to_groupby)
4807+
# Ensure plain ndarray is output if plain ndarray was input.
4808+
if ma.isMaskedArray(aggregateby_data) and not ma.isMaskedArray(input_data):
4809+
aggregateby_data = ma.getdata(aggregateby_data)
47854810

47864811
# Add the aggregation meta data to the aggregate-by cube.
47874812
aggregator.update_metadata(
@@ -4823,13 +4848,14 @@ def aggregated_by(
48234848
aggregateby_cube.add_aux_factory(factory.updated(coord_mapping))
48244849

48254850
# Attach the aggregate-by data into the aggregate-by cube.
4826-
if aggregateby_weights is None:
4827-
data_result = aggregateby_data
4828-
else:
4829-
data_result = (aggregateby_data, aggregateby_weights)
4830-
aggregateby_cube = aggregator.post_process(
4831-
aggregateby_cube, data_result, coordinates, **kwargs
4832-
)
4851+
if not self.is_dataless():
4852+
if aggregateby_weights is None:
4853+
data_result = aggregateby_data
4854+
else:
4855+
data_result = (aggregateby_data, aggregateby_weights)
4856+
aggregateby_cube = aggregator.post_process(
4857+
aggregateby_cube, data_result, coordinates, **kwargs
4858+
)
48334859

48344860
return aggregateby_cube
48354861

lib/iris/fileformats/netcdf/loader.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,17 @@ def _load_cube_inner(engine, cf, cf_var, filename):
392392
from iris.cube import Cube
393393

394394
"""Create the cube associated with the CF-netCDF data variable."""
395-
data = _get_cf_var_data(cf_var)
396-
cube = Cube(data)
395+
from iris.fileformats.netcdf.saver import Saver
396+
397+
if hasattr(cf_var, Saver._DATALESS_ATTRNAME):
398+
# This data-variable represents a dataless cube.
399+
# The variable array content was never written (to take up no space).
400+
data = None
401+
shape = cf_var.shape
402+
else:
403+
data = _get_cf_var_data(cf_var)
404+
shape = None
405+
cube = Cube(data=data, shape=shape)
397406

398407
# Reset the actions engine.
399408
engine.reset()

0 commit comments

Comments
 (0)