Skip to content

Commit b919483

Browse files
committed
Merge branch 'feat/chunks-shards' into feat/read-funcs
2 parents 470b60f + bcdc4cc commit b919483

File tree

4 files changed

+101
-8
lines changed

4 files changed

+101
-8
lines changed

src/zarr/core/array.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,7 @@ def shape(self) -> ChunkCoords:
803803
@property
804804
def chunks(self) -> ChunkCoords:
805805
"""Returns the chunk shape of the Array.
806+
If sharding is used the inner chunk shape is returned.
806807
807808
Only defined for arrays using using `RegularChunkGrid`.
808809
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
@@ -812,14 +813,22 @@ def chunks(self) -> ChunkCoords:
812813
ChunkCoords:
813814
The chunk shape of the Array.
814815
"""
815-
if isinstance(self.metadata.chunk_grid, RegularChunkGrid):
816-
return self.metadata.chunk_grid.chunk_shape
816+
return self.metadata.chunks
817817

818-
msg = (
819-
f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`."
820-
f"This array has a {self.metadata.chunk_grid} instead."
821-
)
822-
raise NotImplementedError(msg)
818+
@property
819+
def shards(self) -> ChunkCoords | None:
820+
"""Returns the shard shape of the Array.
821+
Returns None if sharding is not used.
822+
823+
Only defined for arrays using using `RegularChunkGrid`.
824+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
825+
826+
Returns
827+
-------
828+
ChunkCoords:
829+
The shard shape of the Array.
830+
"""
831+
return self.metadata.shards
823832

824833
@property
825834
def size(self) -> int:
@@ -1733,6 +1742,10 @@ def shape(self, value: ChunkCoords) -> None:
17331742
@property
17341743
def chunks(self) -> ChunkCoords:
17351744
"""Returns a tuple of integers describing the length of each dimension of a chunk of the array.
1745+
If sharding is used the inner chunk shape is returned.
1746+
1747+
Only defined for arrays using using `RegularChunkGrid`.
1748+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
17361749
17371750
Returns
17381751
-------
@@ -1741,6 +1754,21 @@ def chunks(self) -> ChunkCoords:
17411754
"""
17421755
return self._async_array.chunks
17431756

1757+
@property
1758+
def shards(self) -> ChunkCoords | None:
1759+
"""Returns a tuple of integers describing the length of each dimension of a shard of the array.
1760+
Returns None if sharding is not used.
1761+
1762+
Only defined for arrays using using `RegularChunkGrid`.
1763+
If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
1764+
1765+
Returns
1766+
-------
1767+
tuple | None
1768+
A tuple of integers representing the length of each dimension of a shard or None if sharding is not used.
1769+
"""
1770+
return self._async_array.shards
1771+
17441772
@property
17451773
def size(self) -> int:
17461774
"""Returns the total number of elements in the array.

src/zarr/core/metadata/v2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class ArrayV2MetadataDict(TypedDict):
4444
@dataclass(frozen=True, kw_only=True)
4545
class ArrayV2Metadata(Metadata):
4646
shape: ChunkCoords
47-
chunks: tuple[int, ...]
47+
chunks: ChunkCoords
4848
dtype: np.dtype[Any]
4949
fill_value: int | float | str | bytes | None = 0
5050
order: MemoryOrder = "C"
@@ -102,6 +102,10 @@ def ndim(self) -> int:
102102
def chunk_grid(self) -> RegularChunkGrid:
103103
return RegularChunkGrid(chunk_shape=self.chunks)
104104

105+
@property
106+
def shards(self) -> ChunkCoords | None:
107+
return None
108+
105109
def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
106110
def _json_convert(
107111
o: Any,

src/zarr/core/metadata/v3.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,40 @@ def dtype(self) -> np.dtype[Any]:
296296
def ndim(self) -> int:
297297
return len(self.shape)
298298

299+
@property
300+
def chunks(self) -> ChunkCoords:
301+
if isinstance(self.chunk_grid, RegularChunkGrid):
302+
from zarr.codecs.sharding import ShardingCodec
303+
304+
if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec):
305+
sharding_codec = self.codecs[0]
306+
assert isinstance(sharding_codec, ShardingCodec) # for mypy
307+
return sharding_codec.chunk_shape
308+
else:
309+
return self.chunk_grid.chunk_shape
310+
311+
msg = (
312+
f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`."
313+
f"This array has a {self.chunk_grid} instead."
314+
)
315+
raise NotImplementedError(msg)
316+
317+
@property
318+
def shards(self) -> ChunkCoords | None:
319+
if isinstance(self.chunk_grid, RegularChunkGrid):
320+
from zarr.codecs.sharding import ShardingCodec
321+
322+
if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec):
323+
return self.chunk_grid.chunk_shape
324+
else:
325+
return None
326+
327+
msg = (
328+
f"The `shards` attribute is only defined for arrays using `RegularChunkGrid`."
329+
f"This array has a {self.chunk_grid} instead."
330+
)
331+
raise NotImplementedError(msg)
332+
299333
def get_chunk_spec(
300334
self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
301335
) -> ArraySpec:

tests/test_array.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,3 +928,30 @@ def test_auto_partition_auto_shards(
928928
array_shape=array_shape, chunks=chunk_shape, shards="auto", dtype=dtype
929929
)
930930
assert auto_shards == expected_shards
931+
932+
933+
def test_chunks_and_shards() -> None:
934+
store = StorePath(MemoryStore())
935+
shape = (100, 100)
936+
chunks = (5, 5)
937+
shards = (10, 10)
938+
939+
arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4")
940+
assert arr_v3.chunks == chunks
941+
assert arr_v3.shards is None
942+
943+
arr_v3_sharding = zarr.create_array(
944+
store=store / "v3_sharding",
945+
shape=shape,
946+
chunks=chunks,
947+
shards=shards,
948+
dtype="i4",
949+
)
950+
assert arr_v3_sharding.chunks == chunks
951+
assert arr_v3_sharding.shards == shards
952+
953+
arr_v2 = zarr.create_array(
954+
store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4"
955+
)
956+
assert arr_v2.chunks == chunks
957+
assert arr_v2.shards is None

0 commit comments

Comments
 (0)