diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index ca405842e0..0e73d44563 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -175,6 +175,32 @@ async def get_array_metadata( @dataclass(frozen=True) class AsyncArray(Generic[T_ArrayMetadata]): + """ + An asynchronous array class representing a chunked array stored in a Zarr store. + + Parameters + ---------- + metadata : ArrayMetadata + The metadata of the array. + store_path : StorePath + The path to the Zarr store. + codec_pipeline : CodecPipeline, optional + The codec pipeline used for encoding and decoding chunks, by default None. + order : {'C', 'F'}, optional + The order of the array data in memory, by default None. + + Attributes + ---------- + metadata : ArrayMetadata + The metadata of the array. + store_path : StorePath + The path to the Zarr store. + codec_pipeline : CodecPipeline + The codec pipeline used for encoding and decoding chunks. + order : {'C', 'F'} + The order of the array data in memory. + """ + metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) @@ -364,6 +390,69 @@ async def create( exists_ok: bool = False, data: npt.ArrayLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """ + Method to create a new asynchronous array instance. + + Parameters + ---------- + store : StoreLike + The store where the array will be created. + shape : ShapeLike + The shape of the array. + dtype : npt.DTypeLike + The data type of the array. + zarr_format : ZarrFormat, optional + The Zarr format version (default is 3). + fill_value : Any, optional + The fill value of the array (default is None). + attributes : dict[str, JSON], optional + The attributes of the array (default is None). + chunk_shape : ChunkCoords, optional + The shape of the array's chunks (default is None). + chunk_key_encoding : ChunkKeyEncoding, optional + The chunk key encoding (default is None). + codecs : Iterable[Codec | dict[str, JSON]], optional + The codecs used to encode the data (default is None). + dimension_names : Iterable[str], optional + The names of the dimensions (default is None). + chunks : ShapeLike, optional + The shape of the array's chunks (default is None). + V2 only. V3 arrays should not have 'chunks' parameter. + dimension_separator : Literal[".", "/"], optional + The dimension separator (default is None). + V2 only. V3 arrays cannot have a dimension separator. + order : Literal["C", "F"], optional + The order of the array (default is None). + V2 only. V3 arrays should not have 'order' parameter. + filters : list[dict[str, JSON]], optional + The filters used to compress the data (default is None). + V2 only. V3 arrays should not have 'filters' parameter. + compressor : dict[str, JSON], optional + The compressor used to compress the data (default is None). + V2 only. V3 arrays should not have 'compressor' parameter. + exists_ok : bool, optional + Whether to raise an error if the store already exists (default is False). + data : npt.ArrayLike, optional + The data to be inserted into the array (default is None). + + Returns + ------- + AsyncArray + The created asynchronous array instance. + + Examples + -------- + >>> import zarr + >>> store = zarr.storage.MemoryStore(mode='w') + >>> async_arr = await zarr.core.array.AsyncArray.create( + >>> store=store, + >>> shape=(100,100), + >>> chunks=(10,10), + >>> dtype='i4', + >>> fill_value=0) + + + """ store_path = await make_store_path(store) dtype_parsed = parse_dtype(dtype, zarr_format) @@ -558,6 +647,28 @@ async def open( store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: + """ + Async method to open an existing Zarr array from a given store. + + Parameters + ---------- + store : StoreLike + The store containing the Zarr array. + zarr_format : ZarrFormat | None, optional + The Zarr format version (default is 3). + + Returns + ------- + AsyncArray + The opened Zarr array. + + Examples + -------- + >>> import zarr + >>> store = zarr.storage.MemoryStore(mode='w') + >>> async_arr = await AsyncArray.open(store) # doctest: +ELLIPSIS + + """ store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) # TODO: remove this cast when we have better type hints @@ -570,14 +681,38 @@ def store(self) -> Store: @property def ndim(self) -> int: + """Returns the number of dimensions in the Array. + + Returns + ------- + int + The number of dimensions in the Array. + """ return len(self.metadata.shape) @property def shape(self) -> ChunkCoords: + """Returns the shape of the Array. + + Returns + ------- + tuple + The shape of the Array. + """ return self.metadata.shape @property def chunks(self) -> ChunkCoords: + """Returns the chunk shape of the Array. + + Only defined for arrays using using `RegularChunkGrid`. + If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. + + Returns + ------- + ChunkCoords: + The chunk shape of the Array. + """ if isinstance(self.metadata.chunk_grid, RegularChunkGrid): return self.metadata.chunk_grid.chunk_shape @@ -589,28 +724,69 @@ def chunks(self) -> ChunkCoords: @property def size(self) -> int: + """Returns the total number of elements in the array + + Returns + ------- + int + Total number of elements in the array + """ return np.prod(self.metadata.shape).item() @property def dtype(self) -> np.dtype[Any]: + """Returns the data type of the array. + + Returns + ------- + np.dtype + Data type of the array + """ return self.metadata.dtype @property def attrs(self) -> dict[str, JSON]: + """Returns the attributes of the array. + + Returns + ------- + dict + Attributes of the array + """ return self.metadata.attributes @property def read_only(self) -> bool: + """Returns True if the array is read-only. + + Returns + ------- + bool + True if the array is read-only + """ + # Backwards compatibility for 2.x return self.store_path.store.mode.readonly @property def path(self) -> str: - """Storage path.""" + """Storage path. + + Returns + ------- + str + The path to the array in the Zarr store. + """ return self.store_path.path @property def name(self) -> str | None: - """Array name following h5py convention.""" + """Array name following h5py convention. + + Returns + ------- + str + The name of the array. + """ if self.path: # follow h5py convention: add leading slash name = self.path @@ -621,7 +797,13 @@ def name(self) -> str | None: @property def basename(self) -> str | None: - """Final component of name.""" + """Final component of name. + + Returns + ------- + str + The basename or final component of the array name. + """ if self.name is not None: return self.name.split("/")[-1] return None @@ -630,6 +812,11 @@ def basename(self) -> str | None: def cdata_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. + + Returns + ------- + Tuple[int] + The shape of the chunk grid for this array. """ return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) @@ -637,6 +824,11 @@ def cdata_shape(self) -> ChunkCoords: def nchunks(self) -> int: """ The number of chunks in the stored representation of this array. + + Returns + ------- + int + The total number of chunks in the array. """ return product(self.cdata_shape) @@ -644,6 +836,11 @@ def nchunks(self) -> int: def nchunks_initialized(self) -> int: """ The number of chunks that have been persisted in storage. + + Returns + ------- + int + The number of initialized chunks in the array. """ return nchunks_initialized(self) @@ -782,6 +979,36 @@ async def getitem( *, prototype: BufferPrototype | None = None, ) -> NDArrayLike: + """ + Asynchronous function that retrieves a subset of the array's data based on the provided selection. + + Parameters + ---------- + selection : BasicSelection + A selection object specifying the subset of data to retrieve. + prototype : BufferPrototype, optional + A buffer prototype to use for the retrieved data (default is None). + + Returns + ------- + NDArrayLike + The retrieved subset of the array's data. + + Examples + -------- + >>> import zarr + >>> store = zarr.storage.MemoryStore(mode='w') + >>> async_arr = await zarr.core.array.AsyncArray.create( + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype='i4', + ... fill_value=0) + + >>> await async_arr.getitem((0,1)) # doctest: +ELLIPSIS + array(0, dtype=int32) + + """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( @@ -924,6 +1151,18 @@ async def info(self) -> None: @dataclass(frozen=True) class Array: + """Instantiate an array from an initialized store. + + Parameters + ---------- + store : StoreLike + The array store that has already been initialized. + shape : ChunkCoords + The shape of the array. + dtype : npt.DTypeLike + The dtype of the array. + """ + _async_array: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] @classmethod @@ -957,6 +1196,42 @@ def create( # runtime exists_ok: bool = False, ) -> Array: + """Creates a new Array instance from an initialized store. + + Parameters + ---------- + store : StoreLike + The array store that has already been initialized. + shape : ChunkCoords + The shape of the array. + dtype : npt.DTypeLike + The data type of the array. + chunk_shape : ChunkCoords, optional + The shape of the Array's chunks (default is None). + chunk_key_encoding : ChunkKeyEncoding, optional + The chunk key encoding (default is None). + codecs : Iterable[Codec | dict[str, JSON]], optional + The codecs used to encode the data (default is None). + dimension_names : Iterable[str], optional + The names of the dimensions (default is None). + chunks : ChunkCoords, optional + The shape of the Array's chunks (default is None). + dimension_separator : Literal[".", "/"], optional + The dimension separator (default is None). + order : Literal["C", "F"], optional + The order of the array (default is None). + filters : list[dict[str, JSON]], optional + The filters used to compress the data (default is None). + compressor : dict[str, JSON], optional + The compressor used to compress the data (default is None). + exists_ok : bool, optional + Whether to raise an error if the store already exists (default is False). + + Returns + ------- + Array + Array created from the store. + """ async_array = sync( AsyncArray.create( store=store, @@ -993,6 +1268,18 @@ def open( cls, store: StoreLike, ) -> Array: + """Opens an existing Array from a store. + + Parameters + ---------- + store : Store + Store containing the Array. + + Returns + ------- + Array + Array opened from the store. + """ async_array = sync(AsyncArray.open(store)) return cls(async_array) @@ -1002,26 +1289,72 @@ def store(self) -> Store: @property def ndim(self) -> int: + """Returns the number of dimensions in the array. + + Returns + ------- + int + The number of dimensions in the array. + """ return self._async_array.ndim @property def shape(self) -> ChunkCoords: + """Returns the shape of the array. + + Returns + ------- + ChunkCoords + The shape of the array. + """ return self._async_array.shape @property def chunks(self) -> ChunkCoords: + """Returns a tuple of integers describing the length of each dimension of a chunk of the array. + + Returns + ------- + tuple + A tuple of integers representing the length of each dimension of a chunk. + """ return self._async_array.chunks @property def size(self) -> int: + """Returns the total number of elements in the array. + + Returns + ------- + int + Total number of elements in the array. + """ return self._async_array.size @property def dtype(self) -> np.dtype[Any]: + """Returns the NumPy data type. + + Returns + ------- + np.dtype + The NumPy data type. + """ return self._async_array.dtype @property def attrs(self) -> Attributes: + """Returns a MutableMapping containing user-defined attributes. + + Returns + ------- + attrs : MutableMapping + A MutableMapping object containing user-defined attributes. + + Notes + ----- + Note that attribute values must be JSON serializable. + """ return Attributes(self) @property