Skip to content

Commit 500407e

Browse files
committed
WIP - untested.
1 parent 5df7705 commit 500407e

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed

lib/ncdata/utils/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""General user utility functions."""
22
from ._compare_nc_datasets import dataset_differences, variable_differences
33
from ._copy import ncdata_copy
4+
from ._dim_indexing import Slicer, index_by_dimensions
45
from ._save_errors import save_errors
56

67
__all__ = [
8+
"Slicer",
79
"dataset_differences",
10+
"index_by_dimensions",
811
"ncdata_copy",
912
"save_errors",
1013
"variable_differences",

lib/ncdata/utils/_dim_indexing.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
from numbers import Number
2+
from typing import Any, List, Mapping, Union
3+
4+
import dask.array as da
5+
6+
from ncdata import NcData
7+
from ncdata.utils import ncdata_copy
8+
9+
10+
def index_by_dimensions(
11+
ncdata: NcData, **dim_index_kwargs: Mapping[str, Any]
12+
) -> NcData:
13+
"""
14+
Index an NcData over dimensions.
15+
16+
Parameters
17+
----------
18+
ncdata
19+
input data
20+
dim_index_kwargs
21+
specify indexing to apply to dimensions.
22+
E.G. ``x=1``, ``time=slice(0, 100)``, ``levels=[1,2,5]``.
23+
24+
Returns
25+
-------
26+
copy of input with dimensions, and all relevant variables, sub-indexed.
27+
28+
Notes
29+
-----
30+
Where a dimension key is a single value, the dimension will be *removed*.
31+
This mimics how numpy arrays behave, i.e. the difference between a[1] and a[1:2]
32+
33+
Examples
34+
--------
35+
ncdata = index_by_dimensions(ncdata, time=slice(0, 10)) # equivalent to [:10]
36+
ncdata = index_by_dimensions(ncdata, levels=[1,2,5])
37+
ncdata = index_by_dimensions(ncdata, time=3, levels=slice(2, 10, 3))
38+
39+
See Also
40+
--------
41+
:class:`Slicer` provides the same function with a slicing syntax
42+
"""
43+
# Start by copying the input : then modify that in-place
44+
ncdata = ncdata_copy(ncdata)
45+
for dim_name, key in dim_index_kwargs.items():
46+
# Dimension names must occur in the ncdata.
47+
dimension = ncdata.dimensions.get(dim_name)
48+
if dimension is None:
49+
raise ValueError(
50+
f"Dimension {dim_name!r} is not present in 'ncdata'."
51+
)
52+
53+
# Check for and fail repeated dimensions: the meaning would be unclear (!)
54+
matches = [name for name in dim_index_kwargs if name == dim_name]
55+
if len(matches) > 1:
56+
msg = (
57+
f"Dimensions to index, {tuple(dim_index_kwargs.keys())}, "
58+
f"includes dimension {dim_name!r} more than once."
59+
)
60+
raise ValueError(msg)
61+
62+
# Hopefully this replicates how numpy makes this decision?
63+
remove_dim = isinstance(key, Number)
64+
65+
# TODO:
66+
# Key types must be supported:
67+
# * int (or other numeric, including numpy scalars ?)
68+
# * list of int
69+
# * slice object
70+
# * 1-D array of numeric
71+
# Key "special" types we could possibly error or convert, to avoid confusion
72+
# with numpy behaviours ? :
73+
# arrays, tuples, booleans, None, newaxis, ellipsis ...
74+
75+
# Index the data of all referencing variables
76+
for var in ncdata.variables.values():
77+
if dim_name in var.dimensions:
78+
# construct a list of slice objects
79+
(i_slicedim,) = [
80+
i
81+
for i, name in enumerate(var.dimensions)
82+
if name == dim_name
83+
]
84+
slices = [slice(None) for dim in var.dimensions]
85+
slices[i_slicedim] = key
86+
87+
# index the data
88+
var.data = var.data[tuple(slices)]
89+
90+
# also remove the dim, if it will be removed
91+
if remove_dim:
92+
del var.dimensions[dim_name]
93+
94+
# Remove or reduce the dimension itself.
95+
if remove_dim:
96+
del ncdata.dimensions[dim_name]
97+
else:
98+
# calculate the new dim size, using numpy-like logic
99+
# TODO: there is probably a better way of calculating this ?
100+
new_size = da.zeros(dimension.size)[key].shape[0]
101+
dimension.size = new_size
102+
103+
return ncdata
104+
105+
106+
class Slicer:
107+
"""
108+
An object which can index an NcData over its dimensions.
109+
110+
This wraps the :meth:`index_by_dimensions` method for convenience, returning an
111+
object which supports the Python extended slicing syntax.
112+
113+
Examples
114+
--------
115+
data = Slicer(ncdata, 'time')[:10]
116+
data = Slicer(ncdata, 'level')[[1, 2, 5]]
117+
data = Slicer(ncdata, 'level', 'time', 'x', 'y')[1, :3, 2:10:3, ::-1]
118+
"""
119+
120+
def __init__(self, ncdata: NcData, dimensions: Union[str, List[str]]):
121+
"""
122+
Create an indexer for an NcData, applying to specific dimensions.
123+
124+
This can then be indexed to produce a derived (sub-indexed) dataset.
125+
126+
Parameters
127+
----------
128+
ncdata
129+
input data
130+
dimensions
131+
one or more dimension names, to which successive index keys will be applied
132+
"""
133+
self.ncdata = ncdata
134+
if isinstance(dimensions, str):
135+
dimensions = [dimensions]
136+
self.dim_names = tuple(dimensions)
137+
138+
def __getitem__(self, keys) -> NcData:
139+
"""
140+
Return an indexed portion of self.ncdata.
141+
142+
Index with 'keys' in the specified dimensions.
143+
"""
144+
if not isinstance(keys, tuple):
145+
# Single key, e.g. 1, slice(None), [2,3,4], array([2,3])
146+
# N.B. *otherwise* keys is always a tuple
147+
# A single tuple argument is passed as-is, i.e. interprets as multiple keys
148+
keys = [keys]
149+
150+
n_keys = len(keys)
151+
if len(keys) > len(self.dim_names):
152+
msg = (
153+
f"Too many index keys, {n_keys}, for the specified indexing dimension "
154+
"names, {self.dim_names!r}."
155+
)
156+
raise ValueError(msg)
157+
158+
# NB too *few* keys is not a problem, since 'zip' truncates for us.
159+
dim_kwargs = {name: key for name, key in zip(self.dim_names, keys)}
160+
161+
return index_by_dimensions(self.ncdata, **dim_kwargs)

0 commit comments

Comments
 (0)