diff --git a/Changelog.rst b/Changelog.rst index 5b3e8d907c..9b1ce2931c 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -6,6 +6,9 @@ Version NEXTVERSION * New methods to allow changing units in a chain: `cf.Field.to_units`, `cf.Data.to_units` (https://github.com/NCAS-CMS/cf-python/issues/874) +* Allow multiple conditions for the same axis in `cf.Field.subspace` + and `cf.Field.indices` + (https://github.com/NCAS-CMS/cf-python/issues/881) * New dependency: ``distributed>=2025.5.1`` ---- diff --git a/cf/field.py b/cf/field.py index c8b2bc9a48..994eb66fa6 100644 --- a/cf/field.py +++ b/cf/field.py @@ -12754,6 +12754,8 @@ def subspace(self): * Multiple domain axes may be subspaced simultaneously, and it doesn't matter which order they are specified in. + * Multiple criteria may be specified for the same domain axis. + * Subspace criteria may be provided for size 1 domain axes that are not spanned by the field construct's data. diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index e4522c4b89..56f55b8755 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -366,18 +366,13 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): zip(*axes_key_construct_value_id) ) - n_items = len(constructs) + n_constructs = len(constructs) n_axes = len(canonical_axes) - if n_items > n_axes: - if n_axes == 1: - a = "axis" - else: - a = "axes" - + if n_axes > 1 and n_constructs > n_axes: raise ValueError( - f"Error: Can't specify {n_items} conditions for " - f"{n_axes} {a}: {points}. Consider applying the " + f"Error: Can't specify {n_constructs} conditions for " + f"{n_axes} axes: {points}. Consider applying the " "conditions separately." ) @@ -396,150 +391,229 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): # ---------------------------------------------------- # 1-d construct # ---------------------------------------------------- - ind = None - axis = item_axes[0] - item = constructs[0] - value = points[0] - identity = identities[0] + size = domain_axes[axis].get_size() - if debug: - logger.debug( - f" {n_items} 1-d constructs: {constructs!r}\n" - f" item = {item!r}\n" - f" axis = {axis!r}\n" - f" value = {value!r}\n" - f" identity = {identity!r}" - ) # pragma: no cover + ind0 = None + index0 = None - if isinstance(value, (list, slice, tuple, np.ndarray)): - # 1-d CASE 1: Value is already an index, e.g. [0], - # [7,4,2], slice(0,4,2), - # numpy.array([2,4,7]), - # [True,False,True] - if debug: - logger.debug(" 1-d CASE 1:") # pragma: no cover - - index = value - - if envelope or full: - # Set ind - size = domain_axes[axis].get_size() - ind = (np.arange(size)[value],) - # Placeholder which will be overwritten later - index = None - - elif ( - item is not None - and isinstance(value, Query) - and value.operator in ("wi", "wo") - and item.construct_type == "dimension_coordinate" - and self.iscyclic(axis) + # Loop round the conditions for this axis. + # + # When there are multiple conditions, each iteration + # produces a 1-d Boolean array, and the axis selection + # is the logical AND of these arrays. + # + # When there is a single condition, the axis selection + # could be a slice or list of integers. + for item, value, identity in zip( + constructs, points, identities ): - # 1-d CASE 2: Axis is cyclic and subspace - # criterion is a 'within' or 'without' - # Query instance if debug: - logger.debug(" 1-d CASE 2:") # pragma: no cover - - size = item.size - if item.increasing: - anchor = value.value[0] - else: - anchor = value.value[1] - - item = item.persist() - parameters = {} - item = item.anchor(anchor, parameters=parameters) - n = np.roll(np.arange(size), parameters["shift"], 0) - if value.operator == "wi": - n = n[item == value] - if not n.size: - raise ValueError( - f"No indices found from: {identity}={value!r}" - ) - - start = n[0] - stop = n[-1] + 1 - else: - # "wo" operator - n = n[item == wi(*value.value)] - if n.size == size: - raise ValueError( - f"No indices found from: {identity}={value!r}" - ) + logger.debug( + f" axis = {axis!r}\n" + f" item = {item!r}\n" + f" value = {value!r}\n" + f" identity = {identity!r}" + ) # pragma: no cover - if n.size: - start = n[-1] + 1 - stop = start - n.size + ind = None + + if isinstance(value, (list, slice, tuple, np.ndarray)): + # 1-d CASE 1: Value is already an index, + # e.g. [0], [7,4,2], slice(0,4,2), + # numpy.array([2,4,7]), + # [True,False,True] + if debug: + logger.debug(" 1-d CASE 1:") # pragma: no cover + + index = value + + if envelope or full: + # Set ind + ind = np.zeros((size,), bool) + ind[index] = True + # Placeholder to be overwritten later + index = None + + if n_constructs > 1 and index is not None: + # Multiple conditions: Convert 'index' to + # a boolean array + i = np.zeros((size,), bool) + i[index] = True + index = i + + elif ( + item is not None + and isinstance(value, Query) + and value.operator in ("wi", "wo") + and item.construct_type == "dimension_coordinate" + and self.iscyclic(axis) + ): + # 1-d CASE 2: Axis is cyclic and subspace + # criterion is a 'within' or + # 'without' Query instance + if debug: + logger.debug(" 1-d CASE 2:") # pragma: no cover + + size = item.size + if item.increasing: + anchor = value.value[0] else: - start = size - parameters["shift"] - stop = start + size - if stop > size: - stop -= size - - index = slice(start, stop, 1) - - if full: - # Set ind - try: - index = normalize_slice(index, size, cyclic=True) - except IndexError: - # Index is not a cyclic slice - ind = (np.arange(size)[index],) + anchor = value.value[1] + + item = item.persist() + parameters = {} + item = item.anchor(anchor, parameters=parameters) + n = np.roll(np.arange(size), parameters["shift"], 0) + if value.operator == "wi": + n = n[item == value] + if not n.size: + raise ValueError( + "No indices found from: " + f"{identity}={value!r}" + ) + + start = n[0] + stop = n[-1] + 1 else: - # Index is a cyclic slice - ind = ( - np.arange(size)[ + # "wo" operator + n = n[item == wi(*value.value)] + if n.size == size: + raise ValueError( + "No indices found from: " + f"{identity}={value!r}" + ) + + if n.size: + start = n[-1] + 1 + stop = start - n.size + else: + start = size - parameters["shift"] + stop = start + size + if stop > size: + stop -= size + + index = slice(start, stop, 1) + + if envelope or full: + # Set ind + try: + index = normalize_slice( + index, size, cyclic=True + ) + except IndexError: + # Index is not a cyclic slice + ind = np.zeros((size,), bool) + ind[index] = True + # np.arange(size)[index] + else: + # Index is a cyclic slice + if n_constructs > 1: + raise ValueError( + "Error: Can't specify multiple " + "conditions for a single axis when " + f"one of those condtions ({value!r}) " + "is effectively a cyclic slice: " + f"{index}. Consider applying the " + "conditions separately." + ) + + ind = np.zeros((size,), bool) + ind[ np.arange( index.start, index.stop, index.step ) - ], - ) + ] = True - # Placeholder which will be overwritten later - index = None + # Placeholder to be overwritten later + index = None - elif item is not None: - # 1-d CASE 3: All other 1-d cases - if debug: - logger.debug(" 1-d CASE 3:") # pragma: no cover + if n_constructs > 1 and index is not None: + # Multiple conditions: Convert 'index' to + # a boolean array + i = np.zeros((size,), bool) + i[index] = True + index = i - index = item == value + elif item is not None: + # 1-d CASE 3: All other 1-d cases + if debug: + logger.debug(" 1-d CASE 3:") # pragma: no cover - # Performance: Convert the 1-d 'index' to a numpy - # array of bool. - # - # This is because Dask can be *very* slow at - # instantiation time when the 'index' is a Dask - # array, in which case contents of 'index' are - # unknown. - index = np.asanyarray(index) + index = item == value - if envelope or full: - # Set ind + # Performance: Convert the 1-d 'index' to a + # numpy array of bool. + # + # This is because Dask can be *very* slow at + # instantiation time when the 'index' is a + # Dask array, in which case contents of + # 'index' are unknown. index = np.asanyarray(index) - if np.ma.isMA(index): - ind = np.ma.where(index) - else: - ind = np.where(index) - # Placeholder which will be overwritten later - index = None + if envelope or full: + # Set ind + index = np.asanyarray(index) + if np.ma.isMA(index): + ind = np.ma.where(index)[0] + else: + ind = np.where(index)[0] + + # Placeholder to be overwritten later + index = None + + if n_constructs > 1 and ind is not None: + # Multiple conditions: Convert 'ind' to a + # boolean array (note that 'index' is + # already a boolean array) + i = np.zeros((size,), bool) + i[ind] = True + ind = i + else: - # Convert bool to int, to save memory. - size = domain_axes[axis].get_size() - index = normalize_index(index, (size,))[0] - else: - raise ValueError( - "Could not find a unique construct with identity " - f"{identity!r} from which to infer the indices." - ) + raise ValueError( + "Could not find a unique construct with identity " + f"{identity!r} from which to infer the indices." + ) - if debug: - logger.debug( - f" index = {index}\n ind = {ind}" - ) # pragma: no cover + if debug: + logger.debug( + f" index = {index}\n ind = {ind}" + ) # pragma: no cover + + if n_constructs > 1: + # Multiple conditions: Update the 'ind0' and + # 'index0' boolean arrays with the latest + # 'ind' and 'index' + if ind is not None: + # Note that 'index' must be None when + # 'ind' is not None, so no need to update + # 'index0' in this case. + if ind0 is None: + ind0 = ind + else: + ind0 &= ind + else: + if index0 is None: + index0 = index + else: + index0 &= index + + # Finalise 'ind' and 'index' + if n_constructs > 1: + # Multiple conditions + if ind0 is not None: + ind = ind0 + + if index0 is not None: + index = index0 + + if ind is not None: + ind = normalize_index(ind, (size,))[0] + ind = (ind,) + + if index is not None and getattr(index, "dtype", None) == bool: + index = normalize_index(index, (size,))[0] # Put the index into the correct place in the list of # indices. @@ -554,7 +628,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): # ---------------------------------------------------- if debug: logger.debug( - f" {n_items} N-d constructs: {constructs!r}\n" + f" {n_constructs} N-d constructs: {constructs!r}\n" f" {len(points)} points : {points!r}\n" ) # pragma: no cover @@ -626,7 +700,7 @@ def _indices(self, config, data_axes, ancillary_mask, kwargs): # outside of the cell. This could happen if the cells # are not rectilinear (e.g. for curvilinear latitudes # and longitudes arrays). - if n_items == constructs[0].ndim == len(bounds) == 2: + if n_constructs == constructs[0].ndim == len(bounds) == 2: point2 = [] for v, construct in zip(points, transposed_constructs): if isinstance(v, Query) and v.iscontains(): diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 3e0f74979f..881ddbb3eb 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -1159,6 +1159,7 @@ def test_Field_indices(self): x = f.dimension_coordinate("X") x[...] = np.arange(0, 360, 40) + x.set_property("long_name", "grid_longitude") x.set_bounds(x.create_bounds()) f.cyclic("X", iscyclic=True, period=360) @@ -1440,6 +1441,53 @@ def test_Field_indices(self): with self.assertRaises(ValueError): f.indices(grid_longitude=cf.wo(-180, 180)) + # Multiple conditions for one axis + axis = f.get_data_axes("grid_longitude")[0] + + indices = f.indices( + **{"grid_longitude": cf.wi(0, 360), axis: [1, 3, 5, 6]} + ) + g = f[indices] + x = g.dimension_coordinate("X").array + self.assertEqual(g.shape, (1, 10, 4)) + self.assertTrue((x == [40, 120, 200, 240]).all()) + + indices = f.indices( + **{"grid_longitude": cf.wi(0, 180), axis: [1, 3, 5, 6]} + ) + g = f[indices] + x = g.dimension_coordinate("X").array + self.assertEqual(g.shape, (1, 10, 2)) + self.assertTrue((x == [40, 120]).all()) + + indices = f.indices( + "envelope", **{"grid_longitude": cf.wi(0, 180), axis: [1, 3, 5, 6]} + ) + g = f[indices] + x = g.dimension_coordinate("X").array + self.assertEqual(g.shape, (1, 10, 3)) + self.assertTrue((x == [40, 80, 120]).all()) + + indices = f.indices( + "full", **{"grid_longitude": cf.wi(0, 180), axis: [1, 3, 5]} + ) + g = f[indices] + x = g.dimension_coordinate("X").array + self.assertEqual(g.shape, (1, 10, 9)) + self.assertTrue((x == [0, 40, 80, 120, 160, 200, 240, 280, 320]).all()) + + indices = f.indices( + **{ + "grid_longitude": cf.wi(50, 350), + axis: [1, 2, 3, 4, 5, 6, 7], + "long_name=grid_longitude": slice(1, None, 2), + } + ) + g = f[indices] + x = g.dimension_coordinate("X").array + self.assertEqual(g.shape, (1, 10, 3)) + self.assertTrue((x == [120, 200, 280]).all()) + # 2-d lon = f.construct("longitude").array lon = np.transpose(lon) @@ -2925,15 +2973,15 @@ def test_Field_auxiliary_to_dimension_to_auxiliary(self): def test_Field_subspace_ugrid(self): f = cf.read(self.ugrid_global)[0] - with self.assertRaises(ValueError): - # Can't specify 2 conditions for 1 axis - g = f.subspace(X=cf.wi(40, 70), Y=cf.wi(-20, 30)) - g = f.subspace(X=cf.wi(40, 70)) g = g.subspace(Y=cf.wi(-20, 30)) self.assertTrue(g.aux("X").data.range() < 30) self.assertTrue(g.aux("Y").data.range() < 50) + g = f.subspace(X=cf.wi(40, 70), Y=cf.wi(-20, 30)) + self.assertTrue(g.aux("X").data.range() < 30) + self.assertTrue(g.aux("Y").data.range() < 50) + def test_Field_pad_missing(self): """Test Field.pad_missing.""" f = cf.example_field(0)