Skip to content

Commit bcdc4cc

Browse files
committed
adds a shards property
1 parent eab46a2 commit bcdc4cc

File tree

4 files changed

+101
-8
lines changed

4 files changed

+101
-8
lines changed

src/zarr/core/array.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ def shape(self) -> ChunkCoords:
798798
@property
799799
def chunks(self) -> ChunkCoords:
800800
"""Returns the chunk shape of the Array.
801+
If sharding is used the inner chunk shape is returned.
801802
802803
Only defined for arrays using using `RegularChunkGrid`.
803804
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
@@ -807,14 +808,22 @@ def chunks(self) -> ChunkCoords:
807808
ChunkCoords:
808809
The chunk shape of the Array.
809810
"""
810-
if isinstance(self.metadata.chunk_grid, RegularChunkGrid):
811-
return self.metadata.chunk_grid.chunk_shape
811+
return self.metadata.chunks
812812

813-
msg = (
814-
f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`."
815-
f"This array has a {self.metadata.chunk_grid} instead."
816-
)
817-
raise NotImplementedError(msg)
813+
@property
814+
def shards(self) -> ChunkCoords | None:
815+
"""Returns the shard shape of the Array.
816+
Returns None if sharding is not used.
817+
818+
Only defined for arrays using using `RegularChunkGrid`.
819+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
820+
821+
Returns
822+
-------
823+
ChunkCoords:
824+
The shard shape of the Array.
825+
"""
826+
return self.metadata.shards
818827

819828
@property
820829
def size(self) -> int:
@@ -1728,6 +1737,10 @@ def shape(self, value: ChunkCoords) -> None:
17281737
@property
17291738
def chunks(self) -> ChunkCoords:
17301739
"""Returns a tuple of integers describing the length of each dimension of a chunk of the array.
1740+
If sharding is used the inner chunk shape is returned.
1741+
1742+
Only defined for arrays using using `RegularChunkGrid`.
1743+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
17311744
17321745
Returns
17331746
-------
@@ -1736,6 +1749,21 @@ def chunks(self) -> ChunkCoords:
17361749
"""
17371750
return self._async_array.chunks
17381751

1752+
@property
1753+
def shards(self) -> ChunkCoords | None:
1754+
"""Returns a tuple of integers describing the length of each dimension of a shard of the array.
1755+
Returns None if sharding is not used.
1756+
1757+
Only defined for arrays using using `RegularChunkGrid`.
1758+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
1759+
1760+
Returns
1761+
-------
1762+
tuple | None
1763+
A tuple of integers representing the length of each dimension of a shard or None if sharding is not used.
1764+
"""
1765+
return self._async_array.shards
1766+
17391767
@property
17401768
def size(self) -> int:
17411769
"""Returns the total number of elements in the array.

src/zarr/core/metadata/v2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class ArrayV2MetadataDict(TypedDict):
4444
@dataclass(frozen=True, kw_only=True)
4545
class ArrayV2Metadata(Metadata):
4646
shape: ChunkCoords
47-
chunks: tuple[int, ...]
47+
chunks: ChunkCoords
4848
dtype: np.dtype[Any]
4949
fill_value: int | float | str | bytes | None = 0
5050
order: MemoryOrder = "C"
@@ -102,6 +102,10 @@ def ndim(self) -> int:
102102
def chunk_grid(self) -> RegularChunkGrid:
103103
return RegularChunkGrid(chunk_shape=self.chunks)
104104

105+
@property
106+
def shards(self) -> ChunkCoords | None:
107+
return None
108+
105109
def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
106110
def _json_convert(
107111
o: Any,

src/zarr/core/metadata/v3.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,40 @@ def dtype(self) -> np.dtype[Any]:
296296
def ndim(self) -> int:
297297
return len(self.shape)
298298

299+
@property
300+
def chunks(self) -> ChunkCoords:
301+
if isinstance(self.chunk_grid, RegularChunkGrid):
302+
from zarr.codecs.sharding import ShardingCodec
303+
304+
if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec):
305+
sharding_codec = self.codecs[0]
306+
assert isinstance(sharding_codec, ShardingCodec) # for mypy
307+
return sharding_codec.chunk_shape
308+
else:
309+
return self.chunk_grid.chunk_shape
310+
311+
msg = (
312+
f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`."
313+
f"This array has a {self.chunk_grid} instead."
314+
)
315+
raise NotImplementedError(msg)
316+
317+
@property
318+
def shards(self) -> ChunkCoords | None:
319+
if isinstance(self.chunk_grid, RegularChunkGrid):
320+
from zarr.codecs.sharding import ShardingCodec
321+
322+
if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec):
323+
return self.chunk_grid.chunk_shape
324+
else:
325+
return None
326+
327+
msg = (
328+
f"The `shards` attribute is only defined for arrays using `RegularChunkGrid`."
329+
f"This array has a {self.chunk_grid} instead."
330+
)
331+
raise NotImplementedError(msg)
332+
299333
def get_chunk_spec(
300334
self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
301335
) -> ArraySpec:

tests/test_array.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,3 +928,30 @@ def test_auto_partition_auto_shards(
928928
array_shape=array_shape, chunk_shape=chunk_shape, shard_shape="auto", dtype=dtype
929929
)
930930
assert auto_shards == expected_shards
931+
932+
933+
def test_chunks_and_shards() -> None:
934+
store = StorePath(MemoryStore())
935+
shape = (100, 100)
936+
chunks = (5, 5)
937+
shards = (10, 10)
938+
939+
arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4")
940+
assert arr_v3.chunks == chunks
941+
assert arr_v3.shards is None
942+
943+
arr_v3_sharding = zarr.create_array(
944+
store=store / "v3_sharding",
945+
shape=shape,
946+
chunks=chunks,
947+
shards=shards,
948+
dtype="i4",
949+
)
950+
assert arr_v3_sharding.chunks == chunks
951+
assert arr_v3_sharding.shards == shards
952+
953+
arr_v2 = zarr.create_array(
954+
store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4"
955+
)
956+
assert arr_v2.chunks == chunks
957+
assert arr_v2.shards is None

0 commit comments

Comments
 (0)