Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Version NEXTVERSION
* New optional backend for netCDF-3 in `cfdm.read` that allows
parallel reading: ``netcdf_file``
(https://github.com/NCAS-CMS/cfdm/issues/375)
* Fix bug in `cfdm.netcdf_indexer` that sometimes caused a failure
with a `np.newaxis` index
(https://github.com/NCAS-CMS/cfdm/issues/395)
* Fix bug in `cfdm.read` that wouldn't read non-Zarr and Zarr datasets
from the same directory
(https://github.com/NCAS-CMS/cfdm/issues/391)
Expand Down
1 change: 1 addition & 0 deletions cfdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
RTOL,
abspath,
atol,
axis_dropping_index,
chunksize,
configuration,
dirname,
Expand Down
4 changes: 2 additions & 2 deletions cfdm/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import operator
from itertools import product, zip_longest
from math import prod
from numbers import Integral
from os.path import commonprefix

import numpy as np
Expand All @@ -19,6 +18,7 @@
_DEPRECATION_ERROR_KWARGS,
_DEPRECATION_ERROR_METHOD,
_numpy_allclose,
axis_dropping_index,
display_data,
is_log_level_info,
parse_indices,
Expand Down Expand Up @@ -783,7 +783,7 @@ def __getitem__(self, indices):
new_axes = [
axis
for axis, x in zip(self._axes, indices)
if not isinstance(x, Integral) and getattr(x, "shape", True)
if not axis_dropping_index(x) and getattr(x, "shape", True)
]
new._axes = new_axes

Expand Down
8 changes: 3 additions & 5 deletions cfdm/data/mixin/indexmixin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from numbers import Integral

import numpy as np

from cfdm.functions import indices_shape, parse_indices
from cfdm.functions import axis_dropping_index, indices_shape, parse_indices


class IndexMixin:
Expand Down Expand Up @@ -127,7 +125,7 @@ def __getitem__(self, index):
i = 0
j = 0
for ind0, reference_size in zip(index0, reference_shape[:]):
if isinstance(ind0, Integral):
if axis_dropping_index(ind0):
# The previous call to __getitem__ resulted in a
# dimension being removed (i.e. 'ind0' is
# integer-valued). Therefore 'index1' must have fewer
Expand All @@ -142,7 +140,7 @@ def __getitem__(self, index):

i += 1
if ind0 is newaxis:
if isinstance(ind1, Integral):
if axis_dropping_index(ind1):
# A previously introduced new axis is being
# removed by an integer index
if ind1 not in (0, -1):
Expand Down
83 changes: 48 additions & 35 deletions cfdm/data/netcdfindexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@

import logging
from math import prod
from numbers import Integral

import numpy as np

from cfdm.functions import axis_dropping_index

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -230,39 +231,51 @@ def __getitem__(self, index):
# ------------------------------------------------------------
# Index the variable
# ------------------------------------------------------------
try:
data = self._index(index)
except (IndexError, AttributeError):
# Assume we are here because we have one or more
# np.newaxis values in 'index', and the variable doesn't
# support that type of indexing. It is known that
# `netCDF4` and `zarr` raise an IndexError and `h5netcdf`
# raises an AttributeError.

# Subspace the variable with the np.newaxis elements
# removed

# Create the index without any new-axis elements. We'll first
# subsapce the variable without new axes (given that some
# variables don't like them, such as `h5py.Variable`), and
# reinstate them (if any) on the `numpy` array later.
#
# E.g. index : (1, np.newaxis, slice(1, 5))
# => index1: (1, slice(1, 5))
index1 = index
new_axes = False
if index1 is not Ellipsis:
if not isinstance(index, tuple):
index = (index,)

newaxis = np.newaxis
index1 = [i for i in index if i is not newaxis]
data = self._index(tuple(index1))

# Now subspace the result (which we're assuming is
# something that likes np.newaxis indices) with the
# np.newaxis elements reinstated.
index2 = [i if i is newaxis else slice(None) for i in index]
data = self._index(tuple(index2), data=data)

# E.g. index : (1, np.newaxis, slice(1, 5))
# => index1: (1, slice(1, 5))
# and index2: (slice(None), np.newaxis, slice(None))
except ValueError:
# Something went wrong, which is indicative of the
# variable not supporting the appropriate slicing method
# (e.g. `h5netcdf` might have returned "ValueError: Step
# must be >= 1 (got -2)"). Therefore we'll just get the
# entire array as a numpy array, and then try indexing
# that.
index1 = tuple([i for i in index if i is not newaxis])
new_axes = len(index1) < len(index)

try:
# Subspace with any new-axis elements removed
data = self._index(index1)
except Exception:
# Something went wrong. Therefore we'll just get the
# entire array as a numpy array, and try subspacing that.
data = self._index(Ellipsis)
data = self._index(index, data=data)
else:
if new_axes:
# There were new-axis elements in the original index,
# so apply them to the data.
#
# E.g. index : (1, np.newaxis, slice(1, 5))
# => index1: (1, slice(1, 5))
# => index2: (np.newaxis, slice(None))
index2 = []
for i in index:
if axis_dropping_index(i):
continue

if i is not newaxis:
i = slice(None)

index2.append(i)

data = self._index(tuple(index2), data=data)

# Reset a netCDF4 variable's scale and mask behaviour
if netCDF4_scale:
Expand All @@ -271,7 +284,7 @@ def __getitem__(self, index):
if netCDF4_mask:
variable.set_auto_mask(True)

# Convert str, char, and object data to byte strings
# Convert str, char, and object data to byte strings1
if isinstance(data, str):
data = np.array(data, dtype="S")
elif data.dtype.kind in "OSU":
Expand Down Expand Up @@ -474,7 +487,7 @@ def _index(self, index, data=None):
# so that their axes are not dropped yet (they will be dropped
# later).
index0 = [
slice(i, i + 1) if isinstance(i, Integral) else i for i in index
slice(i, i + 1) if axis_dropping_index(i) else i for i in index
]

if data_orthogonal_indexing or len(axes_with_list_indices) <= 1:
Expand Down Expand Up @@ -532,7 +545,7 @@ def _index(self, index, data=None):
data = data[tuple(index2)]

# Apply any integer indices that will drop axes
index3 = [0 if isinstance(i, Integral) else slice(None) for i in index]
index3 = [0 if axis_dropping_index(i) else slice(None) for i in index]
if index3:
data = data[tuple(index3)]

Expand Down Expand Up @@ -1013,7 +1026,7 @@ def index_shape(cls, index, shape):
# List of int
size = len(ind)
else:
# Index is Integral
# Index is axis-dropping
continue

implied_shape.append(size)
Expand Down
3 changes: 0 additions & 3 deletions cfdm/data/pyfivearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,6 @@ def _get_array(self, index=None):
# Cache the variable
self._set_component("variable", variable, copy=False)

self.close(dataset0)
del dataset, dataset0
Comment on lines -85 to -86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you intend to remove these cleanup calls? If so what's the justification - were they redundant or made so?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes - I did mean to move these :) It was a bug to have them higher because once closed, the self._attributes(variable) call a few lines below wouldn't work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry - a bit confused. The correct close is lower down in the code - these deleted lines crept in, somewhere down the line and needed to be booted out.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK sure - so it's all good then? I can mark this as resolved or go for it if you want to.


# Get the data, applying masking and scaling as required.
array = netcdf_indexer(
variable,
Expand Down
60 changes: 54 additions & 6 deletions cfdm/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,53 @@ def indices_shape(indices, full_shape, keepdims=True):
return shape


def axis_dropping_index(index):
"""Whether a `numpy` index is axis-dropping.

An axis-dropping index is typicall integer-like.

.. versionadded:: (cfdm) NEXTVERSION

:Parameters:

index:
The `numpy` index.

:Returns:

`bool`
`True` if the index would drop an axis during `numpy`
slicing, otherwise `False`.

**Examples**

>>> axis_dropping_index(2)
True
>>> axis_dropping_index(np.array(2))
True
>>> axis_dropping_index(np.int64(2))
True
>>> axis_dropping_index([2])
False
>>> axis_dropping_index(np.array([2]))
False
>>> axis_dropping_index(slice(None))
False
>>> axis_dropping_index([True, False])
False

"""
# Standard Python integer and numpy integer scalar
if isinstance(index, (Integral, np.integer)):
return True

# 0-d numpy array
if isinstance(index, np.ndarray) and not index.ndim:
return np.issubdtype(index.dtype, np.integer)

return False


def parse_indices(shape, indices, keepdims=True, newaxis=False):
"""Parse indices for array access and assignment.

Expand Down Expand Up @@ -2581,12 +2628,13 @@ def parse_indices(shape, indices, keepdims=True, newaxis=False):
"New axis indices are not allowed"
)

# Check that any integer indices are in range for the dimension sizes
# before integral indices are converted to slices below, for (one for)
# consistent behaviour between setitem and getitem. Note out-of-range
# slicing works in Python generally (slices are allowed to extend past
# end points with clipping applied) so we allow those.
integral_index = isinstance(index, Integral)
# Check that any integer indices are in range for the
# dimension sizes before integral indices are converted to
# slices below, for (one for) consistent behaviour between
# setitem and getitem. Note out-of-range slicing works in
# Python generally (slices are allowed to extend past end
# points with clipping applied) so we allow those.
integral_index = axis_dropping_index(index)
if integral_index and not -size <= index < size: # could be negative
raise IndexError(
f"Index {index!r} is out of bounds for axis {i} with "
Expand Down
8 changes: 8 additions & 0 deletions cfdm/test/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,14 @@ def test_dirname(self):
"/data",
)

def test_axis_dropping_axis(self):
"""Test cfdm.axis_dropping_index."""
for i in (2, np.array(2), np.int64(2), np.int32(2)):
self.assertTrue(cfdm.axis_dropping_index(i))

for i in ([2], np.array([2]), slice(None), [True, False]):
self.assertFalse(cfdm.axis_dropping_index(i))


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down
21 changes: 21 additions & 0 deletions cfdm/test/test_netcdf_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,27 @@ def test_netcdf_indexer_index_shape(self):
)
self.assertEqual(x.index_shape((slice(1, 5, -3), 3), (10, 20)), [0])

def test_netcdf_indexer_newaxis(self):
"""Test netcdf_indexer with np.newaxis indices."""
a = np.arange(12).reshape(3, 4)
v = cfdm.netcdf_indexer(a)
self.assertEqual(v[...].shape, (3, 4))
self.assertEqual(v[:2, :3].shape, (2, 3))
self.assertEqual(v[:2, np.newaxis, :3].shape, (2, 1, 3))
self.assertEqual(v[1, :3].shape, (3,))
self.assertEqual(v[1, np.newaxis, :3].shape, (1, 3))

# Test with netCDF backends
for klass in (cfdm.H5netcdfArray, cfdm.NetCDF4Array, cfdm.PyfiveArray):
k = klass("example_field_0.nc", "time", shape=())
dataset, address = k.open()
variable = dataset[address]
v = cfdm.netcdf_indexer(variable)
a = v[...]
self.assertEqual(a.ndim, 0)
b = v[(np.newaxis,)]
self.assertEqual(b.ndim, 1)


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down