Skip to content

Commit 222acb5

Browse files
committed
Dataless merge, combine dataless with/without dataful.
1 parent 4715ad5 commit 222acb5

File tree

2 files changed

+70
-42
lines changed

2 files changed

+70
-42
lines changed

lib/iris/_merge.py

Lines changed: 69 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from collections import OrderedDict, namedtuple
1313
from copy import deepcopy
1414

15+
import dask.array as da
1516
import numpy as np
1617

1718
from iris._lazy_data import (
@@ -320,7 +321,6 @@ class _CubeSignature(
320321
"data_type",
321322
"cell_measures_and_dims",
322323
"ancillary_variables_and_dims",
323-
"is_dataless",
324324
],
325325
)
326326
):
@@ -431,10 +431,13 @@ def match(self, other, error_on_mismatch):
431431
if self.data_shape != other.data_shape:
432432
msg = "cube.shape differs: {} != {}"
433433
msgs.append(msg.format(self.data_shape, other.data_shape))
434-
if self.is_dataless != other.is_dataless:
435-
msg = "cube.is_dataless differs: {} != {}"
436-
msgs.append(msg.format(self.is_dataless, other.is_dataless))
437-
if self.data_type != other.data_type:
434+
if (
435+
self.data_type is not None
436+
and other.data_type is not None
437+
and self.data_type != other.data_type
438+
):
439+
# N.B. allow "None" to match any other dtype: this means that dataless
440+
# cubes can merge with 'dataful' ones.
438441
msg = "cube data dtype differs: {} != {}"
439442
msgs.append(msg.format(self.data_type, other.data_type))
440443
# Both cell_measures_and_dims and ancillary_variables_and_dims are
@@ -1113,9 +1116,6 @@ def __init__(self, cube):
11131116
source-cube.
11141117
11151118
"""
1116-
# if cube.is_dataless():
1117-
# raise iris.exceptions.DatalessError("merge")
1118-
11191119
# Default hint ordering for candidate dimension coordinates.
11201120
self._hints = [
11211121
"time",
@@ -1239,41 +1239,68 @@ def merge(self, unique=True):
12391239

12401240
# Generate group-depth merged cubes from the source-cubes.
12411241
for level in range(group_depth):
1242-
if self._cube_signature.is_dataless:
1243-
merged_shape = self._cube_signature.data_shape
1244-
# ?WRONG? merged_shape = self._stack_shape
1245-
# ?WRONG? merged_shape = (len(nd_indexes),) + shape
1246-
merged_data = None
1247-
all_have_data = False
1248-
else:
1249-
# Stack up all the data from all of the relevant source
1250-
# cubes in a single dask "stacked" array.
1251-
# If it turns out that all the source cubes already had
1252-
# their data loaded then at the end we convert the stack back
1253-
# into a plain numpy array.
1254-
stack = np.empty(self._stack_shape, "object")
1255-
all_have_data = True
1256-
for nd_index in nd_indexes:
1257-
# Get the data of the current existing or last known
1258-
# good source-cube
1259-
group = group_by_nd_index[nd_index]
1260-
offset = min(level, len(group) - 1)
1261-
data = self._skeletons[group[offset]].data
1262-
# Ensure the data is represented as a dask array and
1263-
# slot that array into the stack.
1242+
# Stack up all the data from all of the relevant source
1243+
# cubes in a single dask "stacked" array.
1244+
# If it turns out that all the source cubes already had
1245+
# their data loaded then at the end we convert the stack back
1246+
# into a plain numpy array.
1247+
stack = np.empty(self._stack_shape, "object")
1248+
all_have_real_data = True
1249+
some_are_dataless = False
1250+
part_shape: tuple = None
1251+
part_dtype: np.dtype = None
1252+
for nd_index in nd_indexes:
1253+
# Get the data of the current existing or last known
1254+
# good source-cube
1255+
group = group_by_nd_index[nd_index]
1256+
offset = min(level, len(group) - 1)
1257+
data = self._skeletons[group[offset]].data
1258+
# Ensure the data is represented as a dask array and
1259+
# slot that array into the stack.
1260+
if data is None:
1261+
some_are_dataless = True
1262+
else:
1263+
# We have (at least one) array content : Record the shape+dtype
1264+
if part_shape is None:
1265+
part_shape = data.shape
1266+
part_dtype = data.dtype
1267+
else:
1268+
# We expect that the "parts" should **all be the same**
1269+
assert data.shape == part_shape
1270+
assert data.dtype == part_dtype
1271+
1272+
# ensure lazy (we make the result real, later, if all were real)
12641273
if is_lazy_data(data):
1265-
all_have_data = False
1274+
all_have_real_data = False
12661275
else:
12671276
data = as_lazy_data(data)
1268-
stack[nd_index] = data
1277+
stack[nd_index] = data
12691278

1279+
if part_shape is None:
1280+
# NO parts had data : the result will also be dataless
1281+
merged_data = None
1282+
merged_shape = self._shape
1283+
else:
1284+
# At least some inputs had data : the result will have a data array.
1285+
if some_are_dataless:
1286+
# Some parts were dataless: fill these with a lazy all-missing array.
1287+
missing_part = da.ma.masked_array(
1288+
data=da.zeros(part_shape, dtype=np.dtype("u1")),
1289+
mask=da.ones(part_shape, dtype=bool),
1290+
dtype=part_dtype,
1291+
)
1292+
for inds in np.ndindex(stack.shape):
1293+
if stack[inds] is None:
1294+
stack[inds] = missing_part
1295+
1296+
# Make a single lazy merged result array
12701297
merged_data = multidim_lazy_stack(stack)
12711298
merged_shape = None
1299+
if all_have_real_data:
1300+
# All inputs were concrete, so turn the result back into a
1301+
# normal array.
1302+
merged_data = as_concrete_data(merged_data)
12721303

1273-
if all_have_data:
1274-
# All inputs were concrete, so turn the result back into a
1275-
# normal array.
1276-
merged_data = as_concrete_data(merged_data)
12771304
merged_cube = self._get_cube(merged_data, shape=merged_shape)
12781305
merged_cubes.append(merged_cube)
12791306

@@ -1305,8 +1332,6 @@ def register(self, cube, error_on_mismatch=False):
13051332
this :class:`ProtoCube`.
13061333
13071334
"""
1308-
# if cube.is_dataless():
1309-
# raise iris.exceptions.DatalessError("merge")
13101335
cube_signature = self._cube_signature
13111336
other = self._build_signature(cube)
13121337
match = cube_signature.match(other, error_on_mismatch)
@@ -1565,6 +1590,12 @@ def _get_cube(self, data, shape=None):
15651590
Return a fully constructed cube for the given data, containing
15661591
all its coordinates and metadata.
15671592
1593+
Parameters
1594+
----------
1595+
data : array_like
1596+
Cube data content. If None, `shape` must set and the result is dataless.
1597+
shape : tuple, optional
1598+
Cube data shape, only used if data is None.
15681599
"""
15691600
signature = self._cube_signature
15701601
dim_coords_and_dims = [
@@ -1726,7 +1757,6 @@ def _build_signature(self, cube):
17261757
cube.dtype,
17271758
cube._cell_measures_and_dims,
17281759
cube._ancillary_variables_and_dims,
1729-
cube.is_dataless(),
17301760
)
17311761

17321762
def _add_cube(self, cube, coord_payload):

lib/iris/tests/integration/merge/test_dataless.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ def test_dataless_merge(self):
4545
self._testcube(z=2, dataless=True),
4646
]
4747
)
48-
cubes = cubes.merge()
49-
assert len(cubes) == 2
50-
(cube, cube2) = cubes
48+
cube = cubes.merge_cube()
5149
assert cube.is_dataless()
5250
assert np.all(cube.coord("z").points == [1, 2])
5351

0 commit comments

Comments
 (0)