Skip to content

Commit ee495af

Browse files
committed
Add get_slice_nchunks
1 parent 3a93e79 commit ee495af

File tree

5 files changed

+140
-1
lines changed

5 files changed

+140
-1
lines changed

blosc2/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class Tuner(Enum):
129129
unpack_array2,
130130
unpack_tensor,
131131
)
132-
from .ndarray import NDArray, asarray, copy, empty, frombuffer, full, uninit, zeros
132+
from .ndarray import NDArray, asarray, copy, empty, frombuffer, full, get_slice_nchunks, uninit, zeros
133133
from .schunk import SChunk, open
134134
from .version import __version__
135135

@@ -227,4 +227,5 @@ class Tuner(Enum):
227227
"nthreads",
228228
"compute_chunks_blocks",
229229
"cpu_info",
230+
"get_slice_nchunks",
230231
]

blosc2/blosc2_ext.pyx

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@ cdef extern from "blosc2.h":
416416

417417
int blosc2_register_filter(blosc2_filter *filter)
418418

419+
int blosc2_get_slice_nchunks(blosc2_schunk * schunk, int64_t *start, int64_t *stop, int64_t ** chunks_idx)
420+
419421

420422
cdef extern from "b2nd.h":
421423
ctypedef enum:
@@ -2362,3 +2364,38 @@ def ndarray_from_cframe(cframe, copy=False):
23622364
if not copy:
23632365
ndarray._schunk._avoid_cframe_free(True)
23642366
return ndarray
2367+
2368+
2369+
def array_get_slice_nchunks(array: NDArray, key):
2370+
start, stop = key
2371+
cdef int64_t[B2ND_MAX_DIM] start_, stop_
2372+
for i in range(array.ndim):
2373+
start_[i] = start[i]
2374+
stop_[i] = stop[i]
2375+
cdef int64_t *chunks_idx
2376+
rc = blosc2_get_slice_nchunks(array.array.sc, start_, stop_, &chunks_idx)
2377+
_check_rc(rc, "Error while getting the chunk indexes")
2378+
res = np.empty(rc, dtype=np.int64)
2379+
for i in range(rc):
2380+
res[i] = chunks_idx[i]
2381+
free(chunks_idx)
2382+
return res
2383+
2384+
2385+
def schunk_get_slice_nchunks(schunk: SChunk, key):
2386+
start, stop = key
2387+
nitems = schunk.nbytes // schunk.typesize
2388+
start, stop, _ = slice(start, stop, 1).indices(nitems)
2389+
2390+
cdef int64_t start_, stop_
2391+
start_ = start
2392+
stop_ = stop
2393+
cdef int64_t *chunks_idx
2394+
rc = blosc2_get_slice_nchunks(schunk.schunk, &start_, &stop_, &chunks_idx)
2395+
_check_rc(rc, "Error while getting the chunk indexes")
2396+
2397+
res = np.empty(rc, dtype=np.int64)
2398+
for i in range(rc):
2399+
res[i] = chunks_idx[i]
2400+
free(chunks_idx)
2401+
return res

blosc2/ndarray.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,3 +667,27 @@ def _check_ndarray_kwargs(**kwargs):
667667
raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead")
668668
if "cparams" in kwargs and "blocks" in kwargs["cparams"]:
669669
raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead")
670+
671+
672+
def get_slice_nchunks(schunk, key):
673+
"""
674+
Get the unidimensional chunk indexes needed to get a
675+
slice of a SChunk or a NDArray.
676+
677+
Parameters
678+
----------
679+
schunk:
680+
key
681+
682+
Returns
683+
-------
684+
685+
"""
686+
if isinstance(schunk, NDArray):
687+
array = schunk
688+
key, _ = process_key(key, array.shape)
689+
start, stop, _ = get_ndarray_start_stop(array.ndim, key, array.shape)
690+
key = (start, stop)
691+
return blosc2_ext.array_get_slice_nchunks(array, key)
692+
else:
693+
return blosc2_ext.schunk_get_slice_nchunks(schunk, key)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
import numpy as np
10+
import pytest
11+
12+
import blosc2
13+
14+
argnames = "shape, chunks, blocks, slices, dtype"
15+
argvalues = [
16+
([456], [258], [73], slice(0, 1), np.int32),
17+
([456, 200], [258, 100], [73, 25], (slice(0), slice(0)), np.int64),
18+
([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64),
19+
([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32),
20+
]
21+
22+
23+
@pytest.mark.parametrize(argnames, argvalues)
24+
def test_getitem(shape, chunks, blocks, slices, dtype):
25+
a = blosc2.zeros(shape, dtype, chunks=chunks, blocks=blocks)
26+
schunk = a.schunk
27+
for i in range(schunk.nchunks):
28+
chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=dtype)
29+
schunk.update_data(i, chunk, True)
30+
31+
np.array_equal(np.unique(a[slices]), blosc2.get_slice_nchunks(a, slices))
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
import numpy as np
10+
import pytest
11+
12+
import blosc2
13+
14+
15+
@pytest.mark.parametrize(
16+
"contiguous, urlpath, cparams, nchunks, start, stop",
17+
[
18+
(True, None, {"typesize": 4}, 10, 0, 100),
19+
(True, "b2frame", {"typesize": 4}, 1, 7, 23),
20+
(
21+
False,
22+
None,
23+
{"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4},
24+
5,
25+
21,
26+
200 * 2 * 100,
27+
),
28+
(False, "b2frame", {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, 7, None, None),
29+
(True, None, {"blocksize": 200 * 100, "typesize": 4}, 5, -2456, -234),
30+
(True, "b2frame", {"blocksize": 200 * 100, "typesize": 4}, 4, 2456, -234),
31+
(False, None, {"blocksize": 100 * 100, "typesize": 4}, 2, -200 * 100 + 234, 40000),
32+
],
33+
)
34+
def test_schunk_get_slice(contiguous, urlpath, cparams, nchunks, start, stop):
35+
storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams}
36+
schunk = blosc2.SChunk(chunksize=200 * 100 * 4, mode="w", **storage)
37+
for i in range(nchunks):
38+
chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=np.int32)
39+
schunk.append_data(chunk)
40+
41+
aux = np.empty(200 * 100 * nchunks, dtype=np.int32)
42+
res = aux[start:stop]
43+
sl = schunk.get_slice(start, stop, res)
44+
np.array_equal(np.unique(res), blosc2.get_slice_nchunks(schunk, (start, stop)))
45+
46+
blosc2.remove_urlpath(urlpath)

0 commit comments

Comments
 (0)