Skip to content

Commit 5a14db4

Browse files
committed
Streamlining code and trying to add 1D fast path
1 parent 3507e80 commit 5a14db4

File tree

2 files changed

+88
-84
lines changed

2 files changed

+88
-84
lines changed

bench/ndarray/fancy_index1D.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def genarray(r, verbose=True):
5050
return arr, arrsize
5151

5252

53-
target_sizes = np.float64(np.array([.1, .2, .5, 1, 2]))
53+
target_sizes = np.float64(np.array([.1, .2, .5, 1, 2, 3]))
5454
rng = np.random.default_rng()
5555
blosctimes = []
5656
nptimes = []
@@ -66,9 +66,9 @@ def genarray(r, verbose=True):
6666
def timer(arr):
6767
time_list = []
6868
if not (HDF5 or ZARR):
69-
b = arr[[[idx[::-1]], [idx]]]
70-
time_list += [time.time() - t]
71-
t = time.time()
69+
t = time.time()
70+
b = arr[[[idx[::-1]], [idx]]]
71+
time_list += [time.time() - t]
7272
t = time.time()
7373
b = arr[sorted_idx] if HDF5 else arr[idx]
7474
time_list += [time.time() - t]

src/blosc2/ndarray.py

Lines changed: 84 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,8 +1477,8 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray: # noqa: C
14771477
out: np.ndarray
14781478
14791479
"""
1480-
# TODO: Make this faster for broadcasted keys
1481-
## Can`t do this because ndindex doesn't support all the same indexing cases as Numpy
1480+
# TODO: Make this faster and running out of memory - avoid broadcasting keys
1481+
## Can't do this because ndindex doesn't support all the same indexing cases as Numpy
14821482
# if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
14831483
# return self[:][key] # load into memory for smallish arrays
14841484
shape = self.shape
@@ -1492,45 +1492,45 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray: # noqa: C
14921492
if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all(
14931493
[s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)]
14941494
):
1495-
arr = []
14961495
chunks = np.array(chunks)
1497-
f_ = 0
1498-
# |------|
1496+
# |------|
14991497
# ------| arrs |------
1500-
# f_=0 f_=1 f_=2
1501-
begin = 0
1498+
arrs = []
1499+
begin = None
15021500
end = None
1503-
extradims = 0
15041501
flat_shape = ()
1502+
15051503
for num, i in enumerate(_slice):
1506-
if isinstance(i, np.ndarray):
1507-
if f_ == 2:
1504+
if isinstance(i, np.ndarray): # collecting arrays
1505+
if end is not None:
15081506
raise ValueError("Cannot use slices between arrays of integers in index")
1509-
arr += [i.flatten()]
1510-
extradims += i.ndim - 1
1511-
if f_ == 0:
1507+
arrs.append(i.flatten())
1508+
if begin is None:
15121509
begin = num
1513-
f_ = 1
1514-
else:
1515-
if f_ == 1: # finished adding arrays
1516-
arr = np.stack(arr)
1510+
else: # slice
1511+
if arrs: # flush arrays
1512+
arr = np.stack(arrs)
1513+
arrs = []
15171514
end = num
1518-
f_ = 2
15191515
flat_shape += (arr.shape[-1],)
1520-
# k in [1,step], stop = start + n*step + k
1521-
# stop - start + step - 1 = (n+1)*step + k-1 = (n+1)*step + [0, step - 1]
15221516
flat_shape += ((i.stop - i.start + (i.step - 1)) // i.step,)
1523-
if not isinstance(arr, np.ndarray): # might have missed last part of loop
1524-
arr = np.stack(arr)
1517+
1518+
# flush at the end if arrays remain
1519+
if arrs:
1520+
arr = np.stack(arrs)
15251521
flat_shape += (arr.shape[-1],)
15261522
# out_shape could have new dims if indexing arrays are not all 1D
15271523
# (we have just flattened them so need to handle accordingly)
15281524
divider = chunks[begin:end]
15291525
chunked_arr = arr.T // divider
15301526
unique_chunks, chunk_nitems = np.unique(chunked_arr, axis=0, return_counts=True)
1531-
idx_order = np.lexsort(
1532-
tuple(a for a in reversed(chunked_arr.T))
1533-
) # sort by column with priority to first column
1527+
if len(arr) == 1: # 1D chunks:
1528+
idx_order = np.argsort(arr.squeeze(axis=0), axis=-1) # sort by real index
1529+
else:
1530+
idx_order = np.lexsort(
1531+
tuple(a for a in reversed(chunked_arr.T))
1532+
) # sort by chunks (can't sort by index since larger index could belong to lower chunk)
1533+
# e.g. chunks of (100, 10) means (50, 15) has chunk idx (0,1) but (60,5) has (0, 0)
15341534
sorted_idxs = arr[:, idx_order]
15351535
out = np.empty(flat_shape, dtype=self.dtype)
15361536
shape = np.array(shape)
@@ -1550,65 +1550,35 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray: # noqa: C
15501550
start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
15511551
stop = chunk_nitems_cumsum[chunk_i]
15521552
selection = sorted_idxs[:, start:stop].T
1553-
mid_out_selection = idx_order[start:stop].T
1554-
chunk_begin = chunk_idx * chunks[begin:end]
1555-
chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end])
1553+
out_mid_selection = (idx_order[start:stop].T,)
1554+
if (
1555+
len(arr) == 1
1556+
): # can avoid loading in whole chunk if 1D for array indexed chunks, a bit faster
1557+
chunk_begin = selection[0]
1558+
chunk_end = selection[-1] + 1
1559+
else:
1560+
chunk_begin = chunk_idx * chunks[begin:end]
1561+
chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end])
1562+
loc_mid_selection = tuple(a for a in (selection - chunk_begin).T)
1563+
15561564
# loop over chunks coming from slices before and after array indices
15571565
for cprior_tuple in product(*cprior_slices):
1558-
prior_selection = selector(cprior_tuple, prior_tuple, chunks[:begin])
1559-
# selection relative to coordinates of out (necessarily step = 1)
1560-
# stop = start + step * n + k => n = (stop - start - 1) // step
1561-
# hence, out_stop = out_start + n + 1
1562-
# ps.start = pt.start + out_start * step
1563-
out_prior_selection = tuple(
1564-
slice(
1565-
(ps.start - pt.start + pt.step - 1) // pt.step,
1566-
(ps.start - pt.start + pt.step - 1) // pt.step
1567-
+ (ps.stop - ps.start + ps.step - 1) // ps.step,
1568-
1,
1569-
)
1570-
for ps, pt in zip(prior_selection, prior_tuple, strict=True)
1566+
out_prior_selection, prior_selection, loc_prior_selection = _get_selection(
1567+
cprior_tuple, prior_tuple, chunks[:begin]
15711568
)
15721569
for cpost_tuple in product(*cpost_slices):
1573-
post_selection = selector(
1570+
out_post_selection, post_selection, loc_post_selection = _get_selection(
15741571
cpost_tuple, post_tuple, chunks[end:] if end is not None else []
15751572
)
1576-
# selection relative to coordinates of out (necessarily step = 1)
1577-
out_post_selection = tuple(
1578-
slice(
1579-
(ps.start - pt.start + pt.step - 1) // pt.step,
1580-
(ps.start - pt.start + pt.step - 1) // pt.step
1581-
+ (ps.stop - ps.start + ps.step - 1) // ps.step,
1582-
1,
1583-
)
1584-
for ps, pt in zip(post_selection, post_tuple, strict=True)
1585-
)
1586-
locbegin = np.hstack(
1587-
(
1588-
[s.start for s in prior_selection],
1589-
chunk_begin,
1590-
[s.start for s in post_selection],
1591-
),
1592-
casting="unsafe",
1593-
dtype="int64",
1573+
locbegin, locend = _get_local_slice(
1574+
prior_selection, post_selection, (chunk_begin, chunk_end)
15941575
)
1595-
locend = np.hstack(
1596-
([s.stop for s in prior_selection], chunk_end, [s.stop for s in post_selection]),
1597-
casting="unsafe",
1598-
dtype="int64",
1599-
)
1600-
out_selection = out_prior_selection + (mid_out_selection,) + out_post_selection
16011576
to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype)
16021577
# basically load whole chunk, except for slice part at beginning and end
1603-
super().get_slice_numpy(
1604-
to_be_loaded, (locbegin, locend)
1605-
) # get relevant section of chunk
1606-
loc_idx = (
1607-
tuple(slice(0, s.stop - s.start, s.step) for s in prior_selection)
1608-
+ tuple(a for a in (selection - chunk_begin).T)
1609-
+ tuple(slice(0, s.stop - s.start, s.step) for s in post_selection)
1610-
)
1611-
out[out_selection] = to_be_loaded[loc_idx]
1578+
super().get_slice_numpy(to_be_loaded, (locbegin, locend))
1579+
loc_idx = loc_prior_selection + loc_mid_selection + loc_post_selection
1580+
out_idx = out_prior_selection + out_mid_selection + out_post_selection
1581+
out[out_idx] = to_be_loaded[loc_idx]
16121582
return out.reshape(out_shape) # should have filled in correct order, just need to reshape
16131583

16141584
# Default when there are booleans
@@ -4630,22 +4600,56 @@ def slice_to_chunktuple(s, n):
46304600
return tuple(range(start // n, ceiling(stop, n)))
46314601

46324602

4633-
def selector(ctuple, _tuple, chunks):
4603+
def _get_selection(ctuple, ptuple, chunks):
46344604
# we assume that at least one element of chunk intersects with the slice
46354605
# (as a consequence of only looping over intersecting chunks)
4636-
result = ()
4637-
for i, s, csize in zip(ctuple, _tuple, chunks, strict=True):
4606+
pselection = ()
4607+
for i, s, csize in zip(ctuple, ptuple, chunks, strict=True):
46384608
# we need to advance to first element within chunk that intersects with slice, not
46394609
# necessarily the first element of chunk
46404610
# i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step]
46414611
np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1)
46424612
# can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk
4643-
result += (
4613+
pselection += (
46444614
slice(
46454615
builtins.max(s.start, s.start + np1 * s.step), # start+(n+1)*step gives i*csize if k=step
46464616
builtins.min(csize * (i + 1), s.stop),
46474617
s.step,
46484618
),
46494619
)
46504620

4651-
return result
4621+
# selection relative to coordinates of out (necessarily step = 1)
4622+
# stop = start + step * n + k => n = (stop - start - 1) // step
4623+
# hence, out_stop = out_start + n + 1
4624+
# ps.start = pt.start + out_start * step
4625+
out_pselection = tuple(
4626+
slice(
4627+
(ps.start - pt.start + pt.step - 1) // pt.step,
4628+
(ps.start - pt.start + pt.step - 1) // pt.step + (ps.stop - ps.start + ps.step - 1) // ps.step,
4629+
1,
4630+
)
4631+
for ps, pt in zip(pselection, ptuple, strict=True)
4632+
)
4633+
4634+
loc_selection = tuple(slice(0, s.stop - s.start, s.step) for s in pselection)
4635+
4636+
return out_pselection, pselection, loc_selection
4637+
4638+
4639+
def _get_local_slice(prior_selection, post_selection, chunk_bounds):
4640+
chunk_begin, chunk_end = chunk_bounds
4641+
locbegin = np.hstack(
4642+
(
4643+
[s.start for s in prior_selection],
4644+
chunk_begin,
4645+
[s.start for s in post_selection],
4646+
),
4647+
casting="unsafe",
4648+
dtype="int64",
4649+
)
4650+
locend = np.hstack(
4651+
([s.stop for s in prior_selection], chunk_end, [s.stop for s in post_selection]),
4652+
casting="unsafe",
4653+
dtype="int64",
4654+
)
4655+
return locbegin, locend

0 commit comments

Comments
 (0)