Skip to content

Commit 6c2371b

Browse files
author
Luke Shaw
committed
Cleaning up code
1 parent 75228d3 commit 6c2371b

File tree

3 files changed

+23
-16
lines changed

3 files changed

+23
-16
lines changed

bench/ndarray/fancy_index.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ def genarray(r, ndims=2, verbose=True):
3939
blocks = (max(d // 10, 1),) * ndims
4040
urlpath = f'linspace{r}{ndims}D.b2nd'
4141
t = time.time()
42-
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64,
43-
urlpath=urlpath, mode='w')
42+
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=urlpath, mode='w')
4443
t = time.time() - t
4544
arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30
4645
if verbose:
@@ -135,10 +134,10 @@ def timer(arr):
135134
err = (mean - times.min(axis=1), times.max(axis=1)-mean)
136135
plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
137136
error_kw=dict(lw=2, capthick=2, ecolor='k'))
138-
labs+=label
137+
labs += label
139138

140139
filename = f"{labs}{NDIMS}D" + "sparse" if SPARSE else f"{labs}{NDIMS}D"
141-
filename+=blosc2.__version__.replace('.','_')
140+
filename += blosc2.__version__.replace('.','_')
142141

143142
with open(f"{filename}.pkl", 'wb') as f:
144143
pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)

src/blosc2/ndarray.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,17 +1478,19 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
14781478
14791479
"""
14801480
# TODO: Make this faster and avoid running out of memory - avoid broadcasting keys
1481+
14811482
## Can't do this because ndindex doesn't support all the same indexing cases as Numpy
14821483
# if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
14831484
# return self[:][key] # load into memory for smallish arrays
14841485
shape = self.shape
14851486
chunks = self.chunks
1486-
# after this, all indices are slices or arrays of integers
1487-
# moreover, all arrays are consecutive (otherwise an error is raised)
1487+
14881488
# TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast
14891489
_slice = ndindex.ndindex(key).expand(shape)
14901490
out_shape = _slice.newshape(shape)
14911491
_slice = _slice.raw
1492+
# now all indices are slices or arrays of integers (or booleans)
1493+
# moreover, all arrays are consecutive (otherwise an error is raised)
14921494

14931495
if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all(
14941496
[s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)]
@@ -1500,9 +1502,12 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15001502
begin, end = arridxs[0], arridxs[-1] + 1
15011503
flat_shape = tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[:begin])
15021504
idx_dim = np.prod(_slice[begin].shape)
1505+
1506+
# TODO: find a nicer way to do the copy maybe
15031507
arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype)
15041508
for i, s in enumerate(_slice[begin:end]):
15051509
arr[:, i] = s.reshape(-1) # have to do a copy
1510+
15061511
flat_shape += (idx_dim,)
15071512
flat_shape += tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[end:])
15081513
# out_shape could have new dims if indexing arrays are not all 1D
@@ -1520,7 +1525,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15201525
chunked_arr = np.ascontiguousarray(
15211526
chunked_arr
15221527
) # ensure C-order memory to allow structured dtype view
1523-
# use np.unique but avoid sort and copy
1528+
# TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?)
15241529
_, row_ids, idx_inv, chunk_nitems = np.unique(
15251530
chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]),
15261531
return_counts=True,
@@ -1541,6 +1546,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15411546
slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True)
15421547
]
15431548
cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)]
1549+
# TODO: rewrite to allow interleaved slices/array indexes
15441550
for chunk_i, chunk_idx in enumerate(unique_chunks):
15451551
start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
15461552
stop = chunk_nitems_cumsum[chunk_i]
@@ -4576,7 +4582,7 @@ def __setitem__(self, selection, input) -> np.ndarray:
45764582

45774583
def slice_to_chunktuple(s, n):
45784584
"""
4579-
# credit to ndindex for this function #
4585+
Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks.
45804586
Parameters
45814587
----------
45824588
s : slice

tests/ndarray/test_ndarray.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -293,12 +293,14 @@ def test_oindex():
293293

294294

295295
@pytest.mark.parametrize("c", [None, 10])
296-
def test_findex(c):
297-
# Test 1d fast path
296+
def test_fancy_index(c):
297+
# Test 1d
298298
ndim = 1
299299
chunks = (c,) * ndim if c is not None else None
300300
dtype = np.dtype("float")
301-
d = 1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize) if c is None else 100 # just over fast path size
301+
d = (
302+
1 + int(blosc2.MAX_FAST_PATH_SIZE / dtype.itemsize) if c is None else 100
303+
) # just over numpy fast path size
302304
shape = (d,) * ndim
303305
arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
304306
rng = np.random.default_rng()
@@ -314,7 +316,7 @@ def test_findex(c):
314316
ndim = 3
315317
d = (
316318
1 + int((blosc2.MAX_FAST_PATH_SIZE / 8) ** (1 / ndim)) if c is None else d
317-
) # just over fast path size
319+
) # just over numpy fast path size
318320
shape = (d,) * ndim
319321
chunks = (c,) * ndim if c is not None else None
320322
arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks)
@@ -324,11 +326,11 @@ def test_findex(c):
324326
row = idx
325327
col = rng.permutation(idx)
326328
mask = rng.integers(low=0, high=2, size=(d,)) == 1
327-
#
328-
# ## Test fancy indexing for different use cases
329+
330+
# Test fancy indexing for different use cases
329331
m, M = np.min(idx), np.max(idx)
330332
nparr = arr[:]
331-
# # i)
333+
# i)
332334
b = arr[[m, M // 2, M]]
333335
n = nparr[[m, M // 2, M]]
334336
np.testing.assert_allclose(b, n)
@@ -363,7 +365,7 @@ def test_findex(c):
363365
n2 = nparr[[0, 1], 0, :]
364366
np.testing.assert_allclose(b1, n1)
365367
np.testing.assert_allclose(b2, n2)
366-
# TODO: Support array indices separate by slices
368+
# TODO: Support array indices separated by slices
367369
# b3 = arr[0, :, [0, 1]]
368370
# n3 = nparr[0, :, [0, 1]]
369371
# np.testing.assert_allclose(b3, n3)

0 commit comments

Comments
 (0)