|
| 1 | +from numbers import Number |
| 2 | +from typing import Any, List, Mapping, Union |
| 3 | + |
| 4 | +import dask.array as da |
| 5 | + |
| 6 | +from ncdata import NcData |
| 7 | +from ncdata.utils import ncdata_copy |
| 8 | + |
| 9 | + |
| 10 | +def index_by_dimensions( |
| 11 | + ncdata: NcData, **dim_index_kwargs: Mapping[str, Any] |
| 12 | +) -> NcData: |
| 13 | + """ |
| 14 | + Index an NcData over dimensions. |
| 15 | +
|
| 16 | + Parameters |
| 17 | + ---------- |
| 18 | + ncdata |
| 19 | + input data |
| 20 | + dim_index_kwargs |
| 21 | + specify indexing to apply to dimensions. |
| 22 | + E.G. ``x=1``, ``time=slice(0, 100)``, ``levels=[1,2,5]``. |
| 23 | +
|
| 24 | + Returns |
| 25 | + ------- |
| 26 | + copy of input with dimensions, and all relevant variables, sub-indexed. |
| 27 | +
|
| 28 | + Notes |
| 29 | + ----- |
| 30 | + Where a dimension key is a single value, the dimension will be *removed*. |
| 31 | + This mimics how numpy arrays behave, i.e. the difference between a[1] and a[1:2] |
| 32 | +
|
| 33 | + Examples |
| 34 | + -------- |
| 35 | + ncdata = index_by_dimensions(ncdata, time=slice(0, 10)) # equivalent to [:10] |
| 36 | + ncdata = index_by_dimensions(ncdata, levels=[1,2,5]) |
| 37 | + ncdata = index_by_dimensions(ncdata, time=3, levels=slice(2, 10, 3)) |
| 38 | +
|
| 39 | + See Also |
| 40 | + -------- |
| 41 | + :class:`Slicer` provides the same function with a slicing syntax |
| 42 | + """ |
| 43 | + # Start by copying the input : then modify that in-place |
| 44 | + ncdata = ncdata_copy(ncdata) |
| 45 | + for dim_name, key in dim_index_kwargs.items(): |
| 46 | + # Dimension names must occur in the ncdata. |
| 47 | + dimension = ncdata.dimensions.get(dim_name) |
| 48 | + if dimension is None: |
| 49 | + raise ValueError( |
| 50 | + f"Dimension {dim_name!r} is not present in 'ncdata'." |
| 51 | + ) |
| 52 | + |
| 53 | + # Check for and fail repeated dimensions: the meaning would be unclear (!) |
| 54 | + matches = [name for name in dim_index_kwargs if name == dim_name] |
| 55 | + if len(matches) > 1: |
| 56 | + msg = ( |
| 57 | + f"Dimensions to index, {tuple(dim_index_kwargs.keys())}, " |
| 58 | + f"includes dimension {dim_name!r} more than once." |
| 59 | + ) |
| 60 | + raise ValueError(msg) |
| 61 | + |
| 62 | + # Hopefully this replicates how numpy makes this decision? |
| 63 | + remove_dim = isinstance(key, Number) |
| 64 | + |
| 65 | + # TODO: |
| 66 | + # Key types must be supported: |
| 67 | + # * int (or other numeric, including numpy scalars ?) |
| 68 | + # * list of int |
| 69 | + # * slice object |
| 70 | + # * 1-D array of numeric |
| 71 | + # Key "special" types we could possibly error or convert, to avoid confusion |
| 72 | + # with numpy behaviours ? : |
| 73 | + # arrays, tuples, booleans, None, newaxis, ellipsis ... |
| 74 | + |
| 75 | + # Index the data of all referencing variables |
| 76 | + for var in ncdata.variables.values(): |
| 77 | + if dim_name in var.dimensions: |
| 78 | + # construct a list of slice objects |
| 79 | + (i_slicedim,) = [ |
| 80 | + i |
| 81 | + for i, name in enumerate(var.dimensions) |
| 82 | + if name == dim_name |
| 83 | + ] |
| 84 | + slices = [slice(None) for dim in var.dimensions] |
| 85 | + slices[i_slicedim] = key |
| 86 | + |
| 87 | + # index the data |
| 88 | + var.data = var.data[tuple(slices)] |
| 89 | + |
| 90 | + # also remove the dim, if it will be removed |
| 91 | + if remove_dim: |
| 92 | + del var.dimensions[dim_name] |
| 93 | + |
| 94 | + # Remove or reduce the dimension itself. |
| 95 | + if remove_dim: |
| 96 | + del ncdata.dimensions[dim_name] |
| 97 | + else: |
| 98 | + # calculate the new dim size, using numpy-like logic |
| 99 | + # TODO: there is probably a better way of calculating this ? |
| 100 | + new_size = da.zeros(dimension.size)[key].shape[0] |
| 101 | + dimension.size = new_size |
| 102 | + |
| 103 | + return ncdata |
| 104 | + |
| 105 | + |
| 106 | +class Slicer: |
| 107 | + """ |
| 108 | + An object which can index an NcData over its dimensions. |
| 109 | +
|
| 110 | + This wraps the :meth:`index_by_dimensions` method for convenience, returning an |
| 111 | + object which supports the Python extended slicing syntax. |
| 112 | +
|
| 113 | + Examples |
| 114 | + -------- |
| 115 | + data = Slicer(ncdata, 'time')[:10] |
| 116 | + data = Slicer(ncdata, 'level')[[1, 2, 5]] |
| 117 | + data = Slicer(ncdata, 'level', 'time', 'x', 'y')[1, :3, 2:10:3, ::-1] |
| 118 | + """ |
| 119 | + |
| 120 | + def __init__(self, ncdata: NcData, dimensions: Union[str, List[str]]): |
| 121 | + """ |
| 122 | + Create an indexer for an NcData, applying to specific dimensions. |
| 123 | +
|
| 124 | + This can then be indexed to produce a derived (sub-indexed) dataset. |
| 125 | +
|
| 126 | + Parameters |
| 127 | + ---------- |
| 128 | + ncdata |
| 129 | + input data |
| 130 | + dimensions |
| 131 | + one or more dimension names, to which successive index keys will be applied |
| 132 | + """ |
| 133 | + self.ncdata = ncdata |
| 134 | + if isinstance(dimensions, str): |
| 135 | + dimensions = [dimensions] |
| 136 | + self.dim_names = tuple(dimensions) |
| 137 | + |
| 138 | + def __getitem__(self, keys) -> NcData: |
| 139 | + """ |
| 140 | + Return an indexed portion of self.ncdata. |
| 141 | +
|
| 142 | + Index with 'keys' in the specified dimensions. |
| 143 | + """ |
| 144 | + if not isinstance(keys, tuple): |
| 145 | + # Single key, e.g. 1, slice(None), [2,3,4], array([2,3]) |
| 146 | + # N.B. *otherwise* keys is always a tuple |
| 147 | + # A single tuple argument is passed as-is, i.e. interprets as multiple keys |
| 148 | + keys = [keys] |
| 149 | + |
| 150 | + n_keys = len(keys) |
| 151 | + if len(keys) > len(self.dim_names): |
| 152 | + msg = ( |
| 153 | + f"Too many index keys, {n_keys}, for the specified indexing dimension " |
| 154 | + "names, {self.dim_names!r}." |
| 155 | + ) |
| 156 | + raise ValueError(msg) |
| 157 | + |
| 158 | + # NB too *few* keys is not a problem, since 'zip' truncates for us. |
| 159 | + dim_kwargs = {name: key for name, key in zip(self.dim_names, keys)} |
| 160 | + |
| 161 | + return index_by_dimensions(self.ncdata, **dim_kwargs) |
0 commit comments