Skip to content

Commit cd897c9

Browse files
Merge pull request #380 from Blosc/consecutive-slices
Fast path in NDArray.slice for slices aligned with chunks
2 parents cddb5c3 + 4cc709c commit cd897c9

File tree

3 files changed

+230
-0
lines changed

3 files changed

+230
-0
lines changed

bench/ndarray/aligned_chunks.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
# Benchmark for comparing speeds of NDArray.slice() when using
10+
# different slices containing consecutive and non-consecutive chunks,
11+
# as well as aligned and unaligned.
12+
13+
import math
14+
from time import time
15+
import numpy as np
16+
import blosc2
17+
18+
# Dimensions and type properties for the arrays
19+
shape = (50, 100, 300)
20+
chunks = (5, 25, 50)
21+
blocks = (1, 5, 10)
22+
dtype = np.dtype(np.int32)
23+
24+
# Non-consecutive slices
25+
nc_slices = [
26+
(slice(0, 50), slice(0, 100), slice(0, 300-1)),
27+
(slice(0, 10), slice(0, 100-1), slice(0, 300)),
28+
(slice(0, 5-1), slice(0, 25), slice(0, 300)),
29+
(slice(0, 5), slice(0, 25), slice(0, 50-1)),
30+
]
31+
# Consecutive slices
32+
c_slices = [
33+
(slice(0, 50), slice(0, 100), slice(0, 300)),
34+
(slice(0, 10), slice(0, 100), slice(0, 300)),
35+
(slice(0, 5), slice(0, 25), slice(0, 300)),
36+
(slice(0, 5), slice(0, 25), slice(0, 50)),
37+
]
38+
# Non-aligned slices
39+
na_slices = [
40+
(slice(10, 50-1), slice(25, 100), slice(50, 300)),
41+
(slice(10, 40), slice(25, 75-1), slice(100, 200)),
42+
(slice(20, 35), slice(50, 75), slice(100, 300-1)),
43+
(slice(20+1, 25), slice(25, 50), slice(50, 100)),
44+
]
45+
# Aligned slices
46+
a_slices = [
47+
(slice(10, 50), slice(25, 100), slice(50, 300)),
48+
(slice(10, 40), slice(25, 75), slice(100, 200)),
49+
(slice(20, 35), slice(50, 75), slice(100, 300)),
50+
(slice(20, 25), slice(25, 50), slice(50, 100)),
51+
]
52+
53+
print("Creating array with shape:", shape)
54+
t0 = time()
55+
arr = blosc2.arange(math.prod(shape), dtype=dtype, shape=shape, chunks=chunks, blocks=blocks)
56+
print(f"Time to create array: {time() - t0 : .5f}")
57+
58+
print("Timing non-consecutive slices...")
59+
nc_times = []
60+
t0 = time()
61+
for s in nc_slices:
62+
t1 = time()
63+
arr2 = arr.slice(s)
64+
nc_times.append(time() - t1)
65+
# print(arr2.schunk.nbytes, arr[s].nbytes)
66+
# np.testing.assert_array_equal(arr2[:], arr[s])
67+
print(f"Time to get non-consecutive slices: {time() - t0 : .5f}")
68+
69+
print("Timing consecutive slices...")
70+
c_times = []
71+
c_speedup = []
72+
t0 = time()
73+
for i, s in enumerate(c_slices):
74+
t1 = time()
75+
arr2 = arr.slice(s)
76+
c_times.append(time() - t1)
77+
c_speedup.append(nc_times[i] / c_times[i])
78+
# print(arr2.shape, arr[s].shape)
79+
# print(arr2.schunk.nbytes, arr[s].nbytes)
80+
# np.testing.assert_array_equal(arr2[:], arr[s])
81+
print(f"Time to get consecutive slices: {time() - t0 : .5f}")
82+
print(f"Speedups for consecutive slices: ", [f"{s:.2f}x" for s in c_speedup])
83+
84+
print("Timing non-aligned slices...")
85+
na_times = []
86+
t0 = time()
87+
for i, s in enumerate(na_slices):
88+
t1 = time()
89+
arr2 = arr.slice(s)
90+
na_times.append(time() - t1)
91+
# print(arr2.shape, arr[s].shape)
92+
# print(arr2.schunk.nbytes, arr[s].nbytes)
93+
# np.testing.assert_array_equal(arr2[:], arr[s])
94+
print(f"Time to get non-aligned slices: {time() - t0 : .5f}")
95+
96+
print("Timing aligned slices...")
97+
a_times = []
98+
a_speedup = []
99+
t0 = time()
100+
for i, s in enumerate(a_slices):
101+
t1 = time()
102+
arr2 = arr.slice(s)
103+
a_times.append(time() - t1)
104+
a_speedup.append(na_times[i] / a_times[i])
105+
# print(arr2.shape, arr[s].shape)
106+
# print(arr2.schunk.nbytes, arr[s].nbytes)
107+
# np.testing.assert_array_equal(arr2[:], arr[s])
108+
print(f"Time to get aligned slices: {time() - t0 : .5f}")
109+
print(f"Speedups for aligned slices: ", [f"{s:.2f}x" for s in a_speedup])

src/blosc2/ndarray.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import math
1414
from collections import OrderedDict, namedtuple
1515
from functools import reduce
16+
from itertools import product
1617
from typing import TYPE_CHECKING, Any, NamedTuple
1718

1819
from numpy.exceptions import ComplexWarning
@@ -1029,6 +1030,82 @@ def extract_values(arr, indices: np.ndarray[np.int_], max_cache_size: int = 10)
10291030
return extracted_values
10301031

10311032

1033+
def detect_aligned_chunks( # noqa: C901
1034+
key: Sequence[slice], shape: Sequence[int], chunks: Sequence[int], consecutive: bool = False
1035+
) -> list[int]:
1036+
"""
1037+
Detect whether a multidimensional slice is aligned with chunk boundaries.
1038+
1039+
Parameters
1040+
----------
1041+
key : Sequence of slice
1042+
The multidimensional slice to check.
1043+
shape : Sequence of int
1044+
Shape of the NDArray.
1045+
chunks : Sequence of int
1046+
Chunk shape of the NDArray.
1047+
consecutive : bool, default=False
1048+
If True, check if the chunks are consecutive in storage order.
1049+
If False, only check for chunk boundary alignment.
1050+
1051+
Returns
1052+
-------
1053+
list[int]
1054+
List of chunk indices (in C-order) that the slice overlaps with.
1055+
If the slice isn't aligned with chunk boundaries, returns an empty list.
1056+
If consecutive=True and chunks aren't consecutive, returns an empty list.
1057+
"""
1058+
if len(key) != len(shape):
1059+
return []
1060+
1061+
# Check that slice boundaries are exact multiple of chunk boundaries
1062+
for i, s in enumerate(key):
1063+
if s.start is not None and s.start % chunks[i] != 0:
1064+
return []
1065+
if s.stop is not None and s.stop % chunks[i] != 0:
1066+
return []
1067+
1068+
# Parse the slice boundaries
1069+
start_indices = []
1070+
end_indices = []
1071+
n_chunks = []
1072+
1073+
for i, s in enumerate(key):
1074+
start = s.start if s.start is not None else 0
1075+
stop = s.stop if s.stop is not None else shape[i]
1076+
chunk_size = chunks[i]
1077+
start_idx = start // chunk_size
1078+
end_idx = stop // chunk_size
1079+
start_indices.append(start_idx)
1080+
end_indices.append(end_idx)
1081+
n_chunks.append(shape[i] // chunk_size)
1082+
1083+
# Get all chunk combinations in the slice
1084+
indices = [range(start, end) for start, end in zip(start_indices, end_indices, strict=False)]
1085+
result = []
1086+
1087+
for combination in product(*indices):
1088+
flat_index = 0
1089+
multiplier = 1
1090+
for idx, n in zip(reversed(range(len(n_chunks))), reversed(n_chunks), strict=False):
1091+
flat_index += combination[idx] * multiplier
1092+
multiplier *= n
1093+
result.append(flat_index)
1094+
1095+
# Check if chunks are consecutive if requested
1096+
if consecutive and result:
1097+
sorted_result = sorted(result)
1098+
if sorted_result[-1] - sorted_result[0] + 1 != len(sorted_result):
1099+
return []
1100+
1101+
# The array of indices must be consecutive
1102+
for i in range(len(sorted_result) - 1):
1103+
if sorted_result[i + 1] - sorted_result[i] != 1:
1104+
return []
1105+
1106+
return sorted(result)
1107+
1108+
10321109
class NDOuterIterator:
10331110
def __init__(self, ndarray: NDArray | NDField, cache_size=1):
10341111
self.ndarray = ndarray
@@ -1846,6 +1923,30 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: Any) -> NDArray:
18461923
kwargs = _check_ndarray_kwargs(**kwargs) # sets cparams to defaults
18471924
key, mask = process_key(key, self.shape)
18481925
start, stop, step = get_ndarray_start_stop(self.ndim, key, self.shape)
1926+
1927+
# Fast path for slices made with consecutive chunks
1928+
if step == (1,) * self.ndim:
1929+
aligned_chunks = detect_aligned_chunks(key, self.shape, self.chunks, consecutive=False)
1930+
if aligned_chunks:
1931+
# print("Aligned chunks detected", aligned_chunks)
1932+
# Create a new ndarray for the key slice
1933+
new_shape = [
1934+
sp - st for sp, st in zip([k.stop for k in key], [k.start for k in key], strict=False)
1935+
]
1936+
newarr = blosc2.empty(
1937+
shape=new_shape,
1938+
dtype=self.dtype,
1939+
chunks=self.chunks,
1940+
blocks=self.blocks,
1941+
**kwargs,
1942+
)
1943+
# Get the chunks from the original array and update the new array
1944+
# No need for chunks to decompress and compress again
1945+
for order, nchunk in enumerate(aligned_chunks):
1946+
chunk = self.schunk.get_chunk(nchunk)
1947+
newarr.schunk.update_chunk(order, chunk)
1948+
return newarr
1949+
18491950
key = (start, stop)
18501951
ndslice = super().get_slice(key, mask, **kwargs)
18511952

tests/ndarray/test_slice.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,26 @@
1616
([456], [258], [73], slice(0, 1), np.int32),
1717
([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64),
1818
([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32),
19+
# Consecutive slices
20+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 10), slice(0, 100), slice(0, 300)), np.int32),
21+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100), slice(0, 300)), np.int32),
22+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 200)), np.int32),
23+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50)), np.int32),
24+
# Aligned slices
25+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50), slice(25, 100), slice(50, 300)), np.int32),
26+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75), slice(100, 200)), np.int32),
27+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300)), np.int32),
28+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 25), slice(25, 50), slice(50, 100)), np.int32),
29+
# Non-consecutive slices
30+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 10), slice(0, 100), slice(0, 300 - 1)), np.int32),
31+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100 - 1), slice(0, 300)), np.int32),
32+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5 - 1), slice(0, 25), slice(0, 200)), np.int32),
33+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50 - 1)), np.int32),
34+
# Non-aligned slices
35+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50 - 1), slice(25, 100), slice(50, 300)), np.int32),
36+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75 - 1), slice(100, 200)), np.int32),
37+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300 - 1)), np.int32),
38+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20 + 1, 25), slice(25, 50), slice(50, 100)), np.int32),
1939
]
2040

2141

0 commit comments

Comments
 (0)