22
33import json
44from asyncio import gather
5- from dataclasses import dataclass , field , replace
5+ from dataclasses import dataclass , field
66from itertools import starmap
77from logging import getLogger
88from typing import TYPE_CHECKING , Any , Generic , Literal , cast , overload
1313from zarr ._compat import _deprecate_positional_args
1414from zarr .abc .store import Store , set_or_delete
1515from zarr .codecs import _get_default_array_bytes_codec
16- from zarr .codecs ._v2 import V2Compressor , V2Filters
16+ from zarr .codecs ._v2 import V2Codec
1717from zarr .core .attributes import Attributes
1818from zarr .core .buffer import (
1919 BufferPrototype ,
@@ -118,9 +118,8 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
118118 if isinstance (metadata , ArrayV3Metadata ):
119119 return get_pipeline_class ().from_codecs (metadata .codecs )
120120 elif isinstance (metadata , ArrayV2Metadata ):
121- return get_pipeline_class ().from_codecs (
122- [V2Filters (metadata .filters ), V2Compressor (metadata .compressor )]
123- )
121+ v2_codec = V2Codec (filters = metadata .filters , compressor = metadata .compressor )
122+ return get_pipeline_class ().from_codecs ([v2_codec ])
124123 else :
125124 raise TypeError
126125
@@ -1104,15 +1103,15 @@ async def setitem(
11041103 )
11051104 return await self ._set_selection (indexer , value , prototype = prototype )
11061105
1107- async def resize (self , new_shape : ChunkCoords , delete_outside_chunks : bool = True ) -> Self :
1106+ async def resize (self , new_shape : ShapeLike , delete_outside_chunks : bool = True ) -> None :
1107+ new_shape = parse_shapelike (new_shape )
11081108 assert len (new_shape ) == len (self .metadata .shape )
11091109 new_metadata = self .metadata .update_shape (new_shape )
11101110
1111- # Remove all chunks outside of the new shape
1112- old_chunk_coords = set (self .metadata .chunk_grid .all_chunk_coords (self .metadata .shape ))
1113- new_chunk_coords = set (self .metadata .chunk_grid .all_chunk_coords (new_shape ))
1114-
11151111 if delete_outside_chunks :
1112+ # Remove all chunks outside of the new shape
1113+ old_chunk_coords = set (self .metadata .chunk_grid .all_chunk_coords (self .metadata .shape ))
1114+ new_chunk_coords = set (self .metadata .chunk_grid .all_chunk_coords (new_shape ))
11161115
11171116 async def _delete_key (key : str ) -> None :
11181117 await (self .store_path / key ).delete ()
@@ -1128,7 +1127,63 @@ async def _delete_key(key: str) -> None:
11281127
11291128 # Write new metadata
11301129 await self ._save_metadata (new_metadata )
1131- return replace (self , metadata = new_metadata )
1130+
1131+ # Update metadata (in place)
1132+ object .__setattr__ (self , "metadata" , new_metadata )
1133+
1134+ async def append (self , data : npt .ArrayLike , axis : int = 0 ) -> ChunkCoords :
1135+ """Append `data` to `axis`.
1136+
1137+ Parameters
1138+ ----------
1139+ data : array-like
1140+ Data to be appended.
1141+ axis : int
1142+ Axis along which to append.
1143+
1144+ Returns
1145+ -------
1146+ new_shape : tuple
1147+
1148+ Notes
1149+ -----
1150+ The size of all dimensions other than `axis` must match between this
1151+ array and `data`.
1152+ """
1153+ # ensure data is array-like
1154+ if not hasattr (data , "shape" ):
1155+ data = np .asanyarray (data )
1156+
1157+ self_shape_preserved = tuple (s for i , s in enumerate (self .shape ) if i != axis )
1158+ data_shape_preserved = tuple (s for i , s in enumerate (data .shape ) if i != axis )
1159+ if self_shape_preserved != data_shape_preserved :
1160+ raise ValueError (
1161+ f"shape of data to append is not compatible with the array. "
1162+ f"The shape of the data is ({ data_shape_preserved } )"
1163+ f"and the shape of the array is ({ self_shape_preserved } )."
1164+ "All dimensions must match except for the dimension being "
1165+ "appended."
1166+ )
1167+ # remember old shape
1168+ old_shape = self .shape
1169+
1170+ # determine new shape
1171+ new_shape = tuple (
1172+ self .shape [i ] if i != axis else self .shape [i ] + data .shape [i ]
1173+ for i in range (len (self .shape ))
1174+ )
1175+
1176+ # resize
1177+ await self .resize (new_shape )
1178+
1179+ # store data
1180+ append_selection = tuple (
1181+ slice (None ) if i != axis else slice (old_shape [i ], new_shape [i ])
1182+ for i in range (len (self .shape ))
1183+ )
1184+ await self .setitem (append_selection , data )
1185+
1186+ return new_shape
11321187
11331188 async def update_attributes (self , new_attributes : dict [str , JSON ]) -> Self :
11341189 # metadata.attributes is "frozen" so we simply clear and update the dict
@@ -1147,7 +1202,8 @@ async def info(self) -> None:
11471202 raise NotImplementedError
11481203
11491204
1150- @dataclass (frozen = True )
1205+ # TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
1206+ @dataclass (frozen = False )
11511207class Array :
11521208 """Instantiate an array from an initialized store."""
11531209
@@ -1297,6 +1353,11 @@ def shape(self) -> ChunkCoords:
12971353 """
12981354 return self ._async_array .shape
12991355
1356+ @shape .setter
1357+ def shape (self , value : ChunkCoords ) -> None :
1358+ """Sets the shape of the array by calling resize."""
1359+ self .resize (value )
1360+
13001361 @property
13011362 def chunks (self ) -> ChunkCoords :
13021363 """Returns a tuple of integers describing the length of each dimension of a chunk of the array.
@@ -2754,18 +2815,18 @@ def blocks(self) -> BlockIndex:
27542815 :func:`set_block_selection` for documentation and examples."""
27552816 return BlockIndex (self )
27562817
2757- def resize (self , new_shape : ChunkCoords ) -> Array :
2818+ def resize (self , new_shape : ShapeLike ) -> None :
27582819 """
27592820 Change the shape of the array by growing or shrinking one or more
27602821 dimensions.
27612822
2762- This method does not modify the original Array object. Instead, it returns a new Array
2763- with the specified shape.
2823+ Parameters
2824+ ----------
2825+ new_shape : tuple
2826+ New shape of the array.
27642827
27652828 Notes
27662829 -----
2767- When resizing an array, the data are not rearranged in any way.
2768-
27692830 If one or more dimensions are shrunk, any chunks falling outside the
27702831 new array shape will be deleted from the underlying store.
27712832 However, it is noteworthy that the chunks partially falling inside the new array
@@ -2778,7 +2839,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27782839 >>> import zarr
27792840 >>> z = zarr.zeros(shape=(10000, 10000),
27802841 >>> chunk_shape=(1000, 1000),
2781- >>> store=StorePath(MemoryStore(mode="w")),
27822842 >>> dtype="i4",)
27832843 >>> z.shape
27842844 (10000, 10000)
@@ -2791,10 +2851,43 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27912851 >>> z2.shape
27922852 (50, 50)
27932853 """
2794- resized = sync (self ._async_array .resize (new_shape ))
2795- # TODO: remove this cast when type inference improves
2796- _resized = cast (AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ], resized )
2797- return type (self )(_resized )
2854+ sync (self ._async_array .resize (new_shape ))
2855+
2856+ def append (self , data : npt .ArrayLike , axis : int = 0 ) -> ChunkCoords :
2857+ """Append `data` to `axis`.
2858+
2859+ Parameters
2860+ ----------
2861+ data : array-like
2862+ Data to be appended.
2863+ axis : int
2864+ Axis along which to append.
2865+
2866+ Returns
2867+ -------
2868+ new_shape : tuple
2869+
2870+ Notes
2871+ -----
2872+ The size of all dimensions other than `axis` must match between this
2873+ array and `data`.
2874+
2875+ Examples
2876+ --------
2877+ >>> import numpy as np
2878+ >>> import zarr
2879+ >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
2880+ >>> z = zarr.array(a, chunks=(1000, 100))
2881+ >>> z.shape
2882+ (10000, 1000)
2883+ >>> z.append(a)
2884+ (20000, 1000)
2885+ >>> z.append(np.vstack([a, a]), axis=1)
2886+ (20000, 2000)
2887+ >>> z.shape
2888+ (20000, 2000)
2889+ """
2890+ return sync (self ._async_array .append (data , axis = axis ))
27982891
27992892 def update_attributes (self , new_attributes : dict [str , JSON ]) -> Array :
28002893 # TODO: remove this cast when type inference improves
0 commit comments