| 
12 | 12 | from collections import OrderedDict, namedtuple  | 
13 | 13 | from copy import deepcopy  | 
14 | 14 | 
 
  | 
 | 15 | +import dask.array as da  | 
15 | 16 | import numpy as np  | 
16 | 17 | 
 
  | 
17 | 18 | from iris._lazy_data import (  | 
@@ -320,7 +321,6 @@ class _CubeSignature(  | 
320 | 321 |             "data_type",  | 
321 | 322 |             "cell_measures_and_dims",  | 
322 | 323 |             "ancillary_variables_and_dims",  | 
323 |  | -            "is_dataless",  | 
324 | 324 |         ],  | 
325 | 325 |     )  | 
326 | 326 | ):  | 
@@ -431,10 +431,13 @@ def match(self, other, error_on_mismatch):  | 
431 | 431 |         if self.data_shape != other.data_shape:  | 
432 | 432 |             msg = "cube.shape differs: {} != {}"  | 
433 | 433 |             msgs.append(msg.format(self.data_shape, other.data_shape))  | 
434 |  | -        if self.is_dataless != other.is_dataless:  | 
435 |  | -            msg = "cube.is_dataless differs: {} != {}"  | 
436 |  | -            msgs.append(msg.format(self.is_dataless, other.is_dataless))  | 
437 |  | -        if self.data_type != other.data_type:  | 
 | 434 | +        if (  | 
 | 435 | +            self.data_type is not None  | 
 | 436 | +            and other.data_type is not None  | 
 | 437 | +            and self.data_type != other.data_type  | 
 | 438 | +        ):  | 
 | 439 | +            # N.B. allow "None" to match any other dtype: this means that dataless  | 
 | 440 | +            # cubes can merge with 'dataful' ones.  | 
438 | 441 |             msg = "cube data dtype differs: {} != {}"  | 
439 | 442 |             msgs.append(msg.format(self.data_type, other.data_type))  | 
440 | 443 |         # Both cell_measures_and_dims and ancillary_variables_and_dims are  | 
@@ -1113,9 +1116,6 @@ def __init__(self, cube):  | 
1113 | 1116 |         source-cube.  | 
1114 | 1117 | 
  | 
1115 | 1118 |         """  | 
1116 |  | -        # if cube.is_dataless():  | 
1117 |  | -        #     raise iris.exceptions.DatalessError("merge")  | 
1118 |  | - | 
1119 | 1119 |         # Default hint ordering for candidate dimension coordinates.  | 
1120 | 1120 |         self._hints = [  | 
1121 | 1121 |             "time",  | 
@@ -1239,41 +1239,68 @@ def merge(self, unique=True):  | 
1239 | 1239 | 
 
  | 
1240 | 1240 |         # Generate group-depth merged cubes from the source-cubes.  | 
1241 | 1241 |         for level in range(group_depth):  | 
1242 |  | -            if self._cube_signature.is_dataless:  | 
1243 |  | -                merged_shape = self._cube_signature.data_shape  | 
1244 |  | -                # ?WRONG? merged_shape = self._stack_shape  | 
1245 |  | -                # ?WRONG? merged_shape = (len(nd_indexes),) + shape  | 
1246 |  | -                merged_data = None  | 
1247 |  | -                all_have_data = False  | 
1248 |  | -            else:  | 
1249 |  | -                # Stack up all the data from all of the relevant source  | 
1250 |  | -                # cubes in a single dask "stacked" array.  | 
1251 |  | -                # If it turns out that all the source cubes already had  | 
1252 |  | -                # their data loaded then at the end we convert the stack back  | 
1253 |  | -                # into a plain numpy array.  | 
1254 |  | -                stack = np.empty(self._stack_shape, "object")  | 
1255 |  | -                all_have_data = True  | 
1256 |  | -                for nd_index in nd_indexes:  | 
1257 |  | -                    # Get the data of the current existing or last known  | 
1258 |  | -                    # good source-cube  | 
1259 |  | -                    group = group_by_nd_index[nd_index]  | 
1260 |  | -                    offset = min(level, len(group) - 1)  | 
1261 |  | -                    data = self._skeletons[group[offset]].data  | 
1262 |  | -                    # Ensure the data is represented as a dask array and  | 
1263 |  | -                    # slot that array into the stack.  | 
 | 1242 | +            # Stack up all the data from all of the relevant source  | 
 | 1243 | +            # cubes in a single dask "stacked" array.  | 
 | 1244 | +            # If it turns out that all the source cubes already had  | 
 | 1245 | +            # their data loaded then at the end we convert the stack back  | 
 | 1246 | +            # into a plain numpy array.  | 
 | 1247 | +            stack = np.empty(self._stack_shape, "object")  | 
 | 1248 | +            all_have_real_data = True  | 
 | 1249 | +            some_are_dataless = False  | 
 | 1250 | +            part_shape: tuple = None  | 
 | 1251 | +            part_dtype: np.dtype = None  | 
 | 1252 | +            for nd_index in nd_indexes:  | 
 | 1253 | +                # Get the data of the current existing or last known  | 
 | 1254 | +                # good source-cube  | 
 | 1255 | +                group = group_by_nd_index[nd_index]  | 
 | 1256 | +                offset = min(level, len(group) - 1)  | 
 | 1257 | +                data = self._skeletons[group[offset]].data  | 
 | 1258 | +                # Ensure the data is represented as a dask array and  | 
 | 1259 | +                # slot that array into the stack.  | 
 | 1260 | +                if data is None:  | 
 | 1261 | +                    some_are_dataless = True  | 
 | 1262 | +                else:  | 
 | 1263 | +                    # We have (at least one) array content : Record the shape+dtype  | 
 | 1264 | +                    if part_shape is None:  | 
 | 1265 | +                        part_shape = data.shape  | 
 | 1266 | +                        part_dtype = data.dtype  | 
 | 1267 | +                    else:  | 
 | 1268 | +                        # We expect that the "parts" should **all be the same**  | 
 | 1269 | +                        assert data.shape == part_shape  | 
 | 1270 | +                        assert data.dtype == part_dtype  | 
 | 1271 | + | 
 | 1272 | +                    # ensure lazy (we make the result real, later, if all were real)  | 
1264 | 1273 |                     if is_lazy_data(data):  | 
1265 |  | -                        all_have_data = False  | 
 | 1274 | +                        all_have_real_data = False  | 
1266 | 1275 |                     else:  | 
1267 | 1276 |                         data = as_lazy_data(data)  | 
1268 |  | -                    stack[nd_index] = data  | 
 | 1277 | +                stack[nd_index] = data  | 
1269 | 1278 | 
 
  | 
 | 1279 | +            if part_shape is None:  | 
 | 1280 | +                # NO parts had data : the result will also be dataless  | 
 | 1281 | +                merged_data = None  | 
 | 1282 | +                merged_shape = self._shape  | 
 | 1283 | +            else:  | 
 | 1284 | +                # At least some inputs had data : the result will have a data array.  | 
 | 1285 | +                if some_are_dataless:  | 
 | 1286 | +                    # Some parts were dataless: fill these with a lazy all-missing array.  | 
 | 1287 | +                    missing_part = da.ma.masked_array(  | 
 | 1288 | +                        data=da.zeros(part_shape, dtype=np.dtype("u1")),  | 
 | 1289 | +                        mask=da.ones(part_shape, dtype=bool),  | 
 | 1290 | +                        dtype=part_dtype,  | 
 | 1291 | +                    )  | 
 | 1292 | +                    for inds in np.ndindex(stack.shape):  | 
 | 1293 | +                        if stack[inds] is None:  | 
 | 1294 | +                            stack[inds] = missing_part  | 
 | 1295 | + | 
 | 1296 | +                # Make a single lazy merged result array  | 
1270 | 1297 |                 merged_data = multidim_lazy_stack(stack)  | 
1271 | 1298 |                 merged_shape = None  | 
 | 1299 | +                if all_have_real_data:  | 
 | 1300 | +                    # All inputs were concrete, so turn the result back into a  | 
 | 1301 | +                    # normal array.  | 
 | 1302 | +                    merged_data = as_concrete_data(merged_data)  | 
1272 | 1303 | 
 
  | 
1273 |  | -            if all_have_data:  | 
1274 |  | -                # All inputs were concrete, so turn the result back into a  | 
1275 |  | -                # normal array.  | 
1276 |  | -                merged_data = as_concrete_data(merged_data)  | 
1277 | 1304 |             merged_cube = self._get_cube(merged_data, shape=merged_shape)  | 
1278 | 1305 |             merged_cubes.append(merged_cube)  | 
1279 | 1306 | 
 
  | 
@@ -1305,8 +1332,6 @@ def register(self, cube, error_on_mismatch=False):  | 
1305 | 1332 |             this :class:`ProtoCube`.  | 
1306 | 1333 | 
  | 
1307 | 1334 |         """  | 
1308 |  | -        # if cube.is_dataless():  | 
1309 |  | -        #     raise iris.exceptions.DatalessError("merge")  | 
1310 | 1335 |         cube_signature = self._cube_signature  | 
1311 | 1336 |         other = self._build_signature(cube)  | 
1312 | 1337 |         match = cube_signature.match(other, error_on_mismatch)  | 
@@ -1565,6 +1590,12 @@ def _get_cube(self, data, shape=None):  | 
1565 | 1590 |         Return a fully constructed cube for the given data, containing  | 
1566 | 1591 |         all its coordinates and metadata.  | 
1567 | 1592 | 
  | 
 | 1593 | +        Parameters  | 
 | 1594 | +        ----------  | 
 | 1595 | +        data : array_like  | 
 | 1596 | +            Cube data content.  If None, `shape` must set and the result is dataless.  | 
 | 1597 | +        shape : tuple, optional  | 
 | 1598 | +            Cube data shape, only used if data is None.  | 
1568 | 1599 |         """  | 
1569 | 1600 |         signature = self._cube_signature  | 
1570 | 1601 |         dim_coords_and_dims = [  | 
@@ -1726,7 +1757,6 @@ def _build_signature(self, cube):  | 
1726 | 1757 |             cube.dtype,  | 
1727 | 1758 |             cube._cell_measures_and_dims,  | 
1728 | 1759 |             cube._ancillary_variables_and_dims,  | 
1729 |  | -            cube.is_dataless(),  | 
1730 | 1760 |         )  | 
1731 | 1761 | 
 
  | 
1732 | 1762 |     def _add_cube(self, cube, coord_payload):  | 
 | 
0 commit comments