Skip to content

Commit d1f590d

Browse files
Appending performance improvement (#1014)
* feat: replace the sub-optimal itertools loop * feat: remove old scripts which was commented out * docs: explanation added in appending script * docs: comment polished * fix: remove trailing whitespace found by lint * test: add an edge case test for resize method Co-authored-by: Hailiang Zhang <[email protected]>
1 parent 27cf315 commit d1f590d

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

zarr/core.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,17 +2428,31 @@ def _resize_nosync(self, *args):
24282428
for s, c in zip(new_shape, chunks))
24292429

24302430
# remove any chunks not within range
2431+
# The idea is that, along each dimension,
2432+
# only find and remove the chunk slices that exist in 'old' but not 'new' data.
2433+
# Note that a mutable list ('old_cdata_shape_working_list') is introduced here
2434+
# to dynamically adjust the number of chunks along the already-processed dimensions
2435+
# in order to avoid duplicate chunk removal.
24312436
chunk_store = self.chunk_store
2432-
for cidx in itertools.product(*[range(n) for n in old_cdata_shape]):
2433-
if all(i < c for i, c in zip(cidx, new_cdata_shape)):
2434-
pass # keep the chunk
2435-
else:
2437+
old_cdata_shape_working_list = list(old_cdata_shape)
2438+
for idx_cdata, (val_old_cdata, val_new_cdata) in enumerate(
2439+
zip(old_cdata_shape_working_list, new_cdata_shape)
2440+
):
2441+
for cidx in itertools.product(
2442+
*[
2443+
range(n_new, n_old) if (idx == idx_cdata) else range(n_old)
2444+
for idx, (n_old, n_new) in enumerate(
2445+
zip(old_cdata_shape_working_list, new_cdata_shape)
2446+
)
2447+
]
2448+
):
24362449
key = self._chunk_key(cidx)
24372450
try:
24382451
del chunk_store[key]
24392452
except KeyError:
24402453
# chunk not initialized
24412454
pass
2455+
old_cdata_shape_working_list[idx_cdata] = min(val_old_cdata, val_new_cdata)
24422456

24432457
def append(self, data, axis=0):
24442458
"""Append `data` to `axis`.

zarr/tests/test_core.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,15 @@ def test_resize_2d(self):
667667
assert (10, 10) == z.chunks
668668
assert_array_equal(a[:55, :1], z[:])
669669

670+
z.resize((1, 55))
671+
assert (1, 55) == z.shape
672+
assert (1, 55) == z[:].shape
673+
assert np.dtype('i4') == z.dtype
674+
assert np.dtype('i4') == z[:].dtype
675+
assert (10, 10) == z.chunks
676+
assert_array_equal(a[:1, :10], z[:, :10])
677+
assert_array_equal(np.zeros((1, 55-10), dtype='i4'), z[:, 10:55])
678+
670679
# via shape setter
671680
z.shape = (105, 105)
672681
assert (105, 105) == z.shape

0 commit comments

Comments
 (0)