Skip to content

Commit dc89c13

Browse files
committed
feature(array): implement Array.append
changes the Array.resize to be an inplace operation
1 parent 4d663cc commit dc89c13

File tree

3 files changed

+312
-19
lines changed

3 files changed

+312
-19
lines changed

src/zarr/core/array.py

Lines changed: 120 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import annotations
22

33
import json
4+
import warnings
45
from asyncio import gather
5-
from dataclasses import dataclass, field, replace
6+
from dataclasses import dataclass, field
67
from logging import getLogger
78
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
89

@@ -1106,15 +1107,15 @@ async def setitem(
11061107
)
11071108
return await self._set_selection(indexer, value, prototype=prototype)
11081109

1109-
async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self:
1110+
async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None:
1111+
new_shape = parse_shapelike(new_shape)
11101112
assert len(new_shape) == len(self.metadata.shape)
11111113
new_metadata = self.metadata.update_shape(new_shape)
11121114

1113-
# Remove all chunks outside of the new shape
1114-
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1115-
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
1116-
11171115
if delete_outside_chunks:
1116+
# Remove all chunks outside of the new shape
1117+
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1118+
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
11181119

11191120
async def _delete_key(key: str) -> None:
11201121
await (self.store_path / key).delete()
@@ -1130,7 +1131,61 @@ async def _delete_key(key: str) -> None:
11301131

11311132
# Write new metadata
11321133
await self._save_metadata(new_metadata)
1133-
return replace(self, metadata=new_metadata)
1134+
1135+
# Update metadata (in place)
1136+
object.__setattr__(self, "metadata", new_metadata)
1137+
1138+
async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
1139+
"""Append `data` to `axis`.
1140+
1141+
Parameters
1142+
----------
1143+
data : array-like
1144+
Data to be appended.
1145+
axis : int
1146+
Axis along which to append.
1147+
1148+
Returns
1149+
-------
1150+
new_shape : tuple
1151+
1152+
Notes
1153+
-----
1154+
The size of all dimensions other than `axis` must match between this
1155+
array and `data`.
1156+
"""
1157+
# ensure data is array-like
1158+
if not hasattr(data, "shape"):
1159+
data = np.asanyarray(data)
1160+
1161+
self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis)
1162+
data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis)
1163+
if self_shape_preserved != data_shape_preserved:
1164+
raise ValueError(
1165+
"shape of data to append is not compatible with the array; "
1166+
"all dimensions must match except for the dimension being "
1167+
"appended"
1168+
)
1169+
# remember old shape
1170+
old_shape = self.shape
1171+
1172+
# determine new shape
1173+
new_shape = tuple(
1174+
self.shape[i] if i != axis else self.shape[i] + data.shape[i]
1175+
for i in range(len(self.shape))
1176+
)
1177+
1178+
# resize
1179+
await self.resize(new_shape)
1180+
1181+
# store data
1182+
append_selection = tuple(
1183+
slice(None) if i != axis else slice(old_shape[i], new_shape[i])
1184+
for i in range(len(self.shape))
1185+
)
1186+
await self.setitem(append_selection, data)
1187+
1188+
return new_shape
11341189

11351190
async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
11361191
# metadata.attributes is "frozen" so we simply clear and update the dict
@@ -1149,7 +1204,8 @@ async def info(self) -> None:
11491204
raise NotImplementedError
11501205

11511206

1152-
@dataclass(frozen=True)
1207+
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
1208+
@dataclass(frozen=False)
11531209
class Array:
11541210
"""Instantiate an array from an initialized store.
11551211
@@ -1309,6 +1365,20 @@ def shape(self) -> ChunkCoords:
13091365
"""
13101366
return self._async_array.shape
13111367

1368+
@shape.setter
1369+
def shape(self, value: ChunkCoords) -> None:
1370+
"""Sets the shape of the array by calling resize.
1371+
1372+
.. deprecated:: 3.0.0
1373+
Setting a shape using the shape setter is deprecated, use Array.resize instead.
1374+
"""
1375+
warnings.warn(
1376+
"Setting a shape using the shape setter is deprecated, use Array.resize instead.",
1377+
stacklevel=2,
1378+
category=DeprecationWarning,
1379+
)
1380+
self.resize(value)
1381+
13121382
@property
13131383
def chunks(self) -> ChunkCoords:
13141384
"""Returns a tuple of integers describing the length of each dimension of a chunk of the array.
@@ -2766,18 +2836,18 @@ def blocks(self) -> BlockIndex:
27662836
:func:`set_block_selection` for documentation and examples."""
27672837
return BlockIndex(self)
27682838

2769-
def resize(self, new_shape: ChunkCoords) -> Array:
2839+
def resize(self, new_shape: ShapeLike) -> None:
27702840
"""
27712841
Change the shape of the array by growing or shrinking one or more
27722842
dimensions.
27732843
2774-
This method does not modify the original Array object. Instead, it returns a new Array
2775-
with the specified shape.
2844+
Parameters
2845+
----------
2846+
new_shape : tuple
2847+
New shape of the array.
27762848
27772849
Notes
27782850
-----
2779-
When resizing an array, the data are not rearranged in any way.
2780-
27812851
If one or more dimensions are shrunk, any chunks falling outside the
27822852
new array shape will be deleted from the underlying store.
27832853
However, it is noteworthy that the chunks partially falling inside the new array
@@ -2790,7 +2860,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27902860
>>> import zarr
27912861
>>> z = zarr.zeros(shape=(10000, 10000),
27922862
>>> chunk_shape=(1000, 1000),
2793-
>>> store=StorePath(MemoryStore(mode="w")),
27942863
>>> dtype="i4",)
27952864
>>> z.shape
27962865
(10000, 10000)
@@ -2803,10 +2872,43 @@ def resize(self, new_shape: ChunkCoords) -> Array:
28032872
>>> z2.shape
28042873
(50, 50)
28052874
"""
2806-
resized = sync(self._async_array.resize(new_shape))
2807-
# TODO: remove this cast when type inference improves
2808-
_resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized)
2809-
return type(self)(_resized)
2875+
sync(self._async_array.resize(new_shape))
2876+
2877+
def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
2878+
"""Append `data` to `axis`.
2879+
2880+
Parameters
2881+
----------
2882+
data : array-like
2883+
Data to be appended.
2884+
axis : int
2885+
Axis along which to append.
2886+
2887+
Returns
2888+
-------
2889+
new_shape : tuple
2890+
2891+
Notes
2892+
-----
2893+
The size of all dimensions other than `axis` must match between this
2894+
array and `data`.
2895+
2896+
Examples
2897+
--------
2898+
>>> import numpy as np
2899+
>>> import zarr
2900+
>>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
2901+
>>> z = zarr.array(a, chunks=(1000, 100))
2902+
>>> z.shape
2903+
(10000, 1000)
2904+
>>> z.append(a)
2905+
(20000, 1000)
2906+
>>> z.append(np.vstack([a, a]), axis=1)
2907+
(20000, 2000)
2908+
>>> z.shape
2909+
(20000, 2000)
2910+
"""
2911+
return sync(self._async_array.append(data, axis=axis))
28102912

28112913
def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
28122914
# TODO: remove this cast when type inference improves

tests/v3/test_array.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,3 +417,193 @@ def test_update_attrs(zarr_format: int) -> None:
417417

418418
arr2 = zarr.open_array(store=store, zarr_format=zarr_format)
419419
assert arr2.attrs["foo"] == "bar"
420+
421+
422+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
423+
@pytest.mark.parametrize("zarr_format", [2, 3])
424+
def test_resize_1d(store: MemoryStore, zarr_format: int) -> None:
425+
z = zarr.create(
426+
shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format
427+
)
428+
a = np.arange(105, dtype="i4")
429+
z[:] = a
430+
assert (105,) == z.shape
431+
assert (105,) == z[:].shape
432+
assert np.dtype("i4") == z.dtype
433+
assert np.dtype("i4") == z[:].dtype
434+
assert (10,) == z.chunks
435+
np.testing.assert_array_equal(a, z[:])
436+
437+
z.resize(205)
438+
assert (205,) == z.shape
439+
assert (205,) == z[:].shape
440+
assert np.dtype("i4") == z.dtype
441+
assert np.dtype("i4") == z[:].dtype
442+
assert (10,) == z.chunks
443+
np.testing.assert_array_equal(a, z[:105])
444+
np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:])
445+
446+
z.resize(55)
447+
assert (55,) == z.shape
448+
assert (55,) == z[:].shape
449+
assert np.dtype("i4") == z.dtype
450+
assert np.dtype("i4") == z[:].dtype
451+
assert (10,) == z.chunks
452+
np.testing.assert_array_equal(a[:55], z[:])
453+
454+
# via shape setter
455+
new_shape = (105,)
456+
with pytest.warns(DeprecationWarning):
457+
z.shape = new_shape
458+
assert new_shape == z.shape
459+
assert new_shape == z[:].shape
460+
461+
462+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
463+
@pytest.mark.parametrize("zarr_format", [2, 3])
464+
def test_resize_2d(store: MemoryStore, zarr_format: int) -> None:
465+
z = zarr.create(
466+
shape=(105, 105),
467+
chunks=(10, 10),
468+
dtype="i4",
469+
fill_value=0,
470+
store=store,
471+
zarr_format=zarr_format,
472+
)
473+
a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
474+
z[:] = a
475+
assert (105, 105) == z.shape
476+
assert (105, 105) == z[:].shape
477+
assert np.dtype("i4") == z.dtype
478+
assert np.dtype("i4") == z[:].dtype
479+
assert (10, 10) == z.chunks
480+
np.testing.assert_array_equal(a, z[:])
481+
482+
z.resize((205, 205))
483+
assert (205, 205) == z.shape
484+
assert (205, 205) == z[:].shape
485+
assert np.dtype("i4") == z.dtype
486+
assert np.dtype("i4") == z[:].dtype
487+
assert (10, 10) == z.chunks
488+
np.testing.assert_array_equal(a, z[:105, :105])
489+
np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :])
490+
np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:])
491+
492+
z.resize((55, 55))
493+
assert (55, 55) == z.shape
494+
assert (55, 55) == z[:].shape
495+
assert np.dtype("i4") == z.dtype
496+
assert np.dtype("i4") == z[:].dtype
497+
assert (10, 10) == z.chunks
498+
np.testing.assert_array_equal(a[:55, :55], z[:])
499+
500+
z.resize((55, 1))
501+
assert (55, 1) == z.shape
502+
assert (55, 1) == z[:].shape
503+
assert np.dtype("i4") == z.dtype
504+
assert np.dtype("i4") == z[:].dtype
505+
assert (10, 10) == z.chunks
506+
np.testing.assert_array_equal(a[:55, :1], z[:])
507+
508+
z.resize((1, 55))
509+
assert (1, 55) == z.shape
510+
assert (1, 55) == z[:].shape
511+
assert np.dtype("i4") == z.dtype
512+
assert np.dtype("i4") == z[:].dtype
513+
assert (10, 10) == z.chunks
514+
np.testing.assert_array_equal(a[:1, :10], z[:, :10])
515+
np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55])
516+
517+
# via shape setter
518+
new_shape = (105, 105)
519+
with pytest.warns(DeprecationWarning):
520+
z.shape = new_shape
521+
assert new_shape == z.shape
522+
assert new_shape == z[:].shape
523+
524+
525+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
526+
@pytest.mark.parametrize("zarr_format", [2, 3])
527+
def test_append_1d(store: MemoryStore, zarr_format: int) -> None:
528+
a = np.arange(105)
529+
z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format)
530+
z[:] = a
531+
assert a.shape == z.shape
532+
assert a.dtype == z.dtype
533+
assert (10,) == z.chunks
534+
np.testing.assert_array_equal(a, z[:])
535+
536+
b = np.arange(105, 205)
537+
e = np.append(a, b)
538+
assert z.shape == (105,)
539+
z.append(b)
540+
assert e.shape == z.shape
541+
assert e.dtype == z.dtype
542+
assert (10,) == z.chunks
543+
np.testing.assert_array_equal(e, z[:])
544+
545+
# check append handles array-like
546+
c = [1, 2, 3]
547+
f = np.append(e, c)
548+
z.append(c)
549+
assert f.shape == z.shape
550+
assert f.dtype == z.dtype
551+
assert (10,) == z.chunks
552+
np.testing.assert_array_equal(f, z[:])
553+
554+
555+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
556+
@pytest.mark.parametrize("zarr_format", [2, 3])
557+
def test_append_2d(store: MemoryStore, zarr_format: int) -> None:
558+
a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
559+
z = zarr.create(
560+
shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format
561+
)
562+
z[:] = a
563+
assert a.shape == z.shape
564+
assert a.dtype == z.dtype
565+
assert (10, 10) == z.chunks
566+
actual = z[:]
567+
np.testing.assert_array_equal(a, actual)
568+
569+
b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105))
570+
e = np.append(a, b, axis=0)
571+
z.append(b)
572+
assert e.shape == z.shape
573+
assert e.dtype == z.dtype
574+
assert (10, 10) == z.chunks
575+
actual = z[:]
576+
np.testing.assert_array_equal(e, actual)
577+
578+
579+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
580+
@pytest.mark.parametrize("zarr_format", [2, 3])
581+
def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None:
582+
a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
583+
z = zarr.create(
584+
shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format
585+
)
586+
z[:] = a
587+
assert a.shape == z.shape
588+
assert a.dtype == z.dtype
589+
assert (10, 10) == z.chunks
590+
np.testing.assert_array_equal(a, z[:])
591+
592+
b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105))
593+
e = np.append(a, b, axis=1)
594+
z.append(b, axis=1)
595+
assert e.shape == z.shape
596+
assert e.dtype == z.dtype
597+
assert (10, 10) == z.chunks
598+
np.testing.assert_array_equal(e, z[:])
599+
600+
601+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
602+
@pytest.mark.parametrize("zarr_format", [2, 3])
603+
def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None:
604+
a = np.arange(100)
605+
z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format)
606+
z[:] = a
607+
b = a.reshape(10, 10)
608+
with pytest.raises(ValueError):
609+
z.append(b)

0 commit comments

Comments
 (0)