Now passes tests

lshaw8317 · lshaw8317 · commit 3507e80b8f3a · 2025-08-20T14:41:28.000+02:00
diff --git a/bench/ndarray/fancy_index.py b/bench/ndarray/fancy_index.py
@@ -37,9 +37,10 @@ def genarray(r, ndims=2, verbose=True):
     shape = (d,) * ndims
     chunks = (d // 4,) * ndims
     blocks = (max(d // 10, 1),) * ndims
+    urlpath = f'linspace{r}{ndims}D.b2nd'
     t = time.time()
     arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64,
-                          urlpath=f'linspace{r}{ndims}D.b2nd', mode='w')
+                        urlpath=urlpath, mode='w')
     t = time.time() - t
     arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30
     if verbose:
@@ -136,16 +137,17 @@ def timer(arr):
         error_kw=dict(lw=2, capthick=2, ecolor='k'))
         labs+=label
 
-filename = f"results{labs}{NDIMS}D" + "sparse" if SPARSE else f"results{labs}{NDIMS}D"
+filename = f"{labs}{NDIMS}D" + "sparse" if SPARSE else f"{labs}{NDIMS}D"
+filename+=blosc2.__version__.replace('.','_')
 
 with open(f"{filename}.pkl", 'wb') as f:
-    pickle.dump(result_tuple, f)
+    pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)
 
 plt.xlabel('Array size (GB)')
 plt.legend()
 plt.xticks(x-width, np.round(genuine_sizes, 2))
 plt.ylabel("Time (s)")
-plt.title(f"Fancy indexing performance comparison, {NDIMS}D" +f"{" sparse" if SPARSE else ""}")
+plt.title(f"Fancy indexing {blosc2.__version__}, {NDIMS}D" +f"{" sparse" if SPARSE else ""}")
 plt.gca().set_yscale('log')
 plt.savefig(f'plots/fancyIdx{filename}.png', format="png")
 plt.show()
diff --git a/bench/ndarray/fancy_index1D.py b/bench/ndarray/fancy_index1D.py
@@ -27,9 +27,9 @@
 
 NUMPY = True
 BLOSC = True
-ZARR = True
-HDF5 = True
-SPARSE = True
+ZARR = False
+HDF5 = False
+SPARSE = False
 
 if HDF5:
     SPARSE = True # HDF5 takes too long for non-sparse indexing
@@ -50,7 +50,7 @@ def genarray(r, verbose=True):
     return arr, arrsize
 
 
-target_sizes = np.float64(np.array([.2, .5, 1, 2, 5, 10]))
+target_sizes = np.float64(np.array([.1, .2, .5, 1, 2]))
 rng = np.random.default_rng()
 blosctimes = []
 nptimes = []
@@ -61,12 +61,12 @@ def genarray(r, verbose=True):
     arr, arrsize = genarray(d)
     genuine_sizes += [arrsize]
     idx = rng.integers(low=0, high=arr.shape[0], size=(1000,)) if SPARSE else rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0]//4,))
-    sorted_idx = np.unique(np.sort(idx))
+    sorted_idx = np.sort(np.unique(idx))
     ## Test fancy indexing for different use cases
     def timer(arr):
         time_list = []
         if not (HDF5 or ZARR):
-             b = arr[[[sorted_idx], [idx]]]
+             b = arr[[[idx[::-1]], [idx]]]
              time_list += [time.time() - t]
              t = time.time()
         t = time.time()
@@ -114,15 +114,16 @@ def timer(arr):
         error_kw=dict(lw=2, capthick=2, ecolor='k'))
         labs+=label
 
-filename = f"results{labs}1Dsparse" if SPARSE else f"results{labs}1D"
+filename = f"{labs}1Dsparse" if SPARSE else f"{labs}1D"
+filename+=blosc2.__version__.replace('.','_')
 with open(filename+".pkl", 'wb') as f:
     pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)
 
 plt.xlabel('Array size (GB)')
 plt.legend()
 plt.xticks(x-width, np.round(genuine_sizes, 2))
 plt.ylabel("Time (s)")
-plt.title(f"Fancy indexing performance comparison, 1D {' sparse' if SPARSE else ''}")
+plt.title(f"Fancy indexing {blosc2.__version__}, 1D {' sparse' if SPARSE else ''}")
 plt.gca().set_yscale('log')
 plt.savefig(f'plots/{filename}.png', format="png")
 plt.show()
diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -1488,7 +1488,6 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:  # noqa: C
         _slice = ndindex.ndindex(key).expand(shape)
         out_shape = _slice.newshape(shape)
         _slice = _slice.raw
-        shape = np.array(shape)
 
         if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all(
             [s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)]
@@ -1518,20 +1517,24 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:  # noqa: C
                         end = num
                         f_ = 2
                         flat_shape += (arr.shape[-1],)
-                    flat_shape += ((i.stop - i.start) // i.step,)
+                        # k in [1,step], stop = start + n*step + k
+                        # stop - start + step - 1 = (n+1)*step + k-1 = (n+1)*step + [0, step - 1]
+                    flat_shape += ((i.stop - i.start + (i.step - 1)) // i.step,)
             if not isinstance(arr, np.ndarray):  # might have missed last part of loop
                 arr = np.stack(arr)
                 flat_shape += (arr.shape[-1],)
             # out_shape could have new dims if indexing arrays are not all 1D
             # (we have just flattened them so need to handle accordingly)
+            divider = chunks[begin:end]
+            chunked_arr = arr.T // divider
+            unique_chunks, chunk_nitems = np.unique(chunked_arr, axis=0, return_counts=True)
             idx_order = np.lexsort(
-                tuple(a for a in reversed(arr))
+                tuple(a for a in reversed(chunked_arr.T))
             )  # sort by column with priority to first column
             sorted_idxs = arr[:, idx_order]
             out = np.empty(flat_shape, dtype=self.dtype)
+            shape = np.array(shape)
 
-            divider = chunks[begin:end]
-            unique_chunks, chunk_nitems = np.unique(sorted_idxs.T // divider, axis=0, return_counts=True)
             chunk_nitems_cumsum = np.cumsum(chunk_nitems)
             prior_tuple = _slice[:begin]
             post_tuple = _slice[end:] if end is not None else ()
@@ -1547,13 +1550,23 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:  # noqa: C
                 start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
                 stop = chunk_nitems_cumsum[chunk_i]
                 selection = sorted_idxs[:, start:stop].T
-                out_selection = idx_order[start:stop].T
+                mid_out_selection = idx_order[start:stop].T
+                chunk_begin = chunk_idx * chunks[begin:end]
+                chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end])
                 # loop over chunks coming from slices before and after array indices
                 for cprior_tuple in product(*cprior_slices):
                     prior_selection = selector(cprior_tuple, prior_tuple, chunks[:begin])
                     # selection relative to coordinates of out (necessarily step = 1)
+                    # stop = start + step * n + k => n = (stop - start - 1) // step
+                    # hence, out_stop = out_start + n + 1
+                    # ps.start = pt.start + out_start * step
                     out_prior_selection = tuple(
-                        slice(ps.start - pt.start, ps.stop - pt.start, 1)
+                        slice(
+                            (ps.start - pt.start + pt.step - 1) // pt.step,
+                            (ps.start - pt.start + pt.step - 1) // pt.step
+                            + (ps.stop - ps.start + ps.step - 1) // ps.step,
+                            1,
+                        )
                         for ps, pt in zip(prior_selection, prior_tuple, strict=True)
                     )
                     for cpost_tuple in product(*cpost_slices):
@@ -1562,11 +1575,14 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:  # noqa: C
                         )
                         # selection relative to coordinates of out (necessarily step = 1)
                         out_post_selection = tuple(
-                            slice(ps.start - pt.start, ps.stop - pt.start, 1)
+                            slice(
+                                (ps.start - pt.start + pt.step - 1) // pt.step,
+                                (ps.start - pt.start + pt.step - 1) // pt.step
+                                + (ps.stop - ps.start + ps.step - 1) // ps.step,
+                                1,
+                            )
                             for ps, pt in zip(post_selection, post_tuple, strict=True)
                         )
-                        chunk_begin = chunk_idx * chunks[begin:end]
-                        chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end])
                         locbegin = np.hstack(
                             (
                                 [s.start for s in prior_selection],
@@ -1581,7 +1597,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:  # noqa: C
                             casting="unsafe",
                             dtype="int64",
                         )
-                        out_selection = out_prior_selection + (out_selection,) + out_post_selection
+                        out_selection = out_prior_selection + (mid_out_selection,) + out_post_selection
                         to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype)
                         # basically load whole chunk, except for slice part at beginning and end
                         super().get_slice_numpy(
@@ -4615,7 +4631,21 @@ def slice_to_chunktuple(s, n):
 
 
 def selector(ctuple, _tuple, chunks):
-    return tuple(
-        slice(builtins.max(s.start, i * csize), builtins.min(csize * (i + 1), s.stop), s.step)
-        for i, s, csize in zip(ctuple, _tuple, chunks, strict=True)
-    )
+    # we assume that at least one element of chunk intersects with the slice
+    # (as a consequence of only looping over intersecting chunks)
+    result = ()
+    for i, s, csize in zip(ctuple, _tuple, chunks, strict=True):
+        # we need to advance to first element within chunk that intersects with slice, not
+        # necessarily the first element of chunk
+        # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step]
+        np1 = (i * csize - s.start + s.step - 1) // s.step  # gives (n + 1)
+        # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk
+        result += (
+            slice(
+                builtins.max(s.start, s.start + np1 * s.step),  # start+(n+1)*step gives i*csize if k=step
+                builtins.min(csize * (i + 1), s.stop),
+                s.step,
+            ),
+        )
+
+    return result
diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py
@@ -292,24 +292,32 @@ def test_oindex():
     np.testing.assert_allclose(arr[:], nparr)
 
 
-def test_findex():
+@pytest.mark.parametrize("c", [None, 10])
+def test_findex(c):
     # Test 1d fast path
     ndim = 1
+    chunks = (c,) * ndim if c is not None else None
     dtype = np.dtype("float")
-    d = 1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize)  # just over fast path size
+    d = 1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize) if c is None else 100  # just over fast path size
     shape = (d,) * ndim
-    arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype)
+    arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
     rng = np.random.default_rng()
     idx = rng.integers(low=0, high=d, size=(d // 4,))
     nparr = arr[:]
     b = arr[idx]
     n = nparr[idx]
     np.testing.assert_allclose(b, n)
+    b = arr[[[idx[::-1]], [idx]]]
+    n = nparr[[[idx[::-1]], [idx]]]
+    np.testing.assert_allclose(b, n)
 
     ndim = 3
-    d = 1 + int((blosc2.MAX_FAST_PATH_SIZE / 8) ** (1 / ndim))  # just over fast path size
+    d = (
+        1 + int((blosc2.MAX_FAST_PATH_SIZE / 8) ** (1 / ndim)) if c is None else d
+    )  # just over fast path size
     shape = (d,) * ndim
-    arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype)
+    chunks = (c,) * ndim if c is not None else None
+    arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
     rng = np.random.default_rng()
     idx = rng.integers(low=0, high=d, size=(100,))