Cleaning up code

Luke Shaw · Luke Shaw · commit 6c2371b6b6bb · 2025-08-26T11:09:35.000+02:00
diff --git a/bench/ndarray/fancy_index.py b/bench/ndarray/fancy_index.py
@@ -39,8 +39,7 @@ def genarray(r, ndims=2, verbose=True):
     blocks = (max(d // 10, 1),) * ndims
     urlpath = f'linspace{r}{ndims}D.b2nd'
     t = time.time()
-    arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64,
-                        urlpath=urlpath, mode='w')
+    arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=urlpath, mode='w')
     t = time.time() - t
     arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30
     if verbose:
@@ -135,10 +134,10 @@ def timer(arr):
         err = (mean - times.min(axis=1), times.max(axis=1)-mean)
         plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
         error_kw=dict(lw=2, capthick=2, ecolor='k'))
-        labs+=label
+        labs += label
 
 filename = f"{labs}{NDIMS}D" + "sparse" if SPARSE else f"{labs}{NDIMS}D"
-filename+=blosc2.__version__.replace('.','_')
+filename += blosc2.__version__.replace('.','_')
 
 with open(f"{filename}.pkl", 'wb') as f:
     pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)
diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -1478,17 +1478,19 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
 
         """
         # TODO: Make this faster and avoid running out of memory - avoid broadcasting keys
+
         ## Can't do this because ndindex doesn't support all the same indexing cases as Numpy
         # if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
         #     return self[:][key]  # load into memory for smallish arrays
         shape = self.shape
         chunks = self.chunks
-        # after this, all indices are slices or arrays of integers
-        # moreover, all arrays are consecutive (otherwise an error is raised)
+
         # TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast
         _slice = ndindex.ndindex(key).expand(shape)
         out_shape = _slice.newshape(shape)
         _slice = _slice.raw
+        # now all indices are slices or arrays of integers (or booleans)
+        # moreover, all arrays are consecutive (otherwise an error is raised)
 
         if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all(
             [s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)]
@@ -1500,9 +1502,12 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
             begin, end = arridxs[0], arridxs[-1] + 1
             flat_shape = tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[:begin])
             idx_dim = np.prod(_slice[begin].shape)
+
+            # TODO: find a nicer way to do the copy maybe
             arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype)
             for i, s in enumerate(_slice[begin:end]):
                 arr[:, i] = s.reshape(-1)  # have to do a copy
+
             flat_shape += (idx_dim,)
             flat_shape += tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[end:])
             # out_shape could have new dims if indexing arrays are not all 1D
@@ -1520,7 +1525,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
                 chunked_arr = np.ascontiguousarray(
                     chunked_arr
                 )  # ensure C-order memory to allow structured dtype view
-                # use np.unique but avoid sort and copy
+                # TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?)
                 _, row_ids, idx_inv, chunk_nitems = np.unique(
                     chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]),
                     return_counts=True,
@@ -1541,6 +1546,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
                 slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True)
             ]
             cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)]
+            # TODO: rewrite to allow interleaved slices/array indexes
             for chunk_i, chunk_idx in enumerate(unique_chunks):
                 start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
                 stop = chunk_nitems_cumsum[chunk_i]
@@ -4576,7 +4582,7 @@ def __setitem__(self, selection, input) -> np.ndarray:
 
 def slice_to_chunktuple(s, n):
     """
-        # credit to ndindex for this function #
+    Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks.
     Parameters
     ----------
     s : slice
diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py
@@ -293,12 +293,14 @@ def test_oindex():
 
 
 @pytest.mark.parametrize("c", [None, 10])
-def test_findex(c):
-    # Test 1d fast path
+def test_fancy_index(c):
+    # Test 1d
     ndim = 1
     chunks = (c,) * ndim if c is not None else None
     dtype = np.dtype("float")
-    d = 1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize) if c is None else 100  # just over fast path size
+    d = (
+        1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize) if c is None else 100
+    )  # just over numpy fast path size
     shape = (d,) * ndim
     arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
     rng = np.random.default_rng()
@@ -314,7 +316,7 @@ def test_findex(c):
     ndim = 3
     d = (
         1 + int((blosc2.MAX_FAST_PATH_SIZE / 8) ** (1 / ndim)) if c is None else d
-    )  # just over fast path size
+    )  # just over numpy fast path size
     shape = (d,) * ndim
     chunks = (c,) * ndim if c is not None else None
     arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
@@ -324,11 +326,11 @@ def test_findex(c):
     row = idx
     col = rng.permutation(idx)
     mask = rng.integers(low=0, high=2, size=(d,)) == 1
-    #
-    # ## Test fancy indexing for different use cases
+
+    # Test fancy indexing for different use cases
     m, M = np.min(idx), np.max(idx)
     nparr = arr[:]
-    # # i)
+    # i)
     b = arr[[m, M // 2, M]]
     n = nparr[[m, M // 2, M]]
     np.testing.assert_allclose(b, n)
@@ -363,7 +365,7 @@ def test_findex(c):
     n2 = nparr[[0, 1], 0, :]
     np.testing.assert_allclose(b1, n1)
     np.testing.assert_allclose(b2, n2)
-    # TODO: Support array indices separate by slices
+    # TODO: Support array indices separated by slices
     # b3 = arr[0, :, [0, 1]]
     # n3 = nparr[0, :, [0, 1]]
     # np.testing.assert_allclose(b3, n3)