Skip to content

Commit 3bf61c5

Browse files
committed
adapt Array.info
1 parent 3f14d2a commit 3bf61c5

File tree

6 files changed

+80
-54
lines changed

6 files changed

+80
-54
lines changed

src/zarr/core/_info.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numcodecs.abc
66
import numpy as np
77

8-
from zarr.abc.codec import Codec
8+
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
99
from zarr.core.common import ZarrFormat
1010
from zarr.core.metadata.v3 import DataType
1111

@@ -85,9 +85,9 @@ class ArrayInfo:
8585
_order: Literal["C", "F"]
8686
_read_only: bool
8787
_store_type: str
88-
_compressor: numcodecs.abc.Codec | None = None
89-
_filters: tuple[numcodecs.abc.Codec, ...] | None = None
90-
_codecs: tuple[Codec, ...] | None = None
88+
_filters: tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...] = ()
89+
_serializer: ArrayBytesCodec | None = None
90+
_compressors: tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...] = ()
9191
_count_bytes: int | None = None
9292
_count_bytes_stored: int | None = None
9393
_count_chunks_initialized: int | None = None
@@ -113,14 +113,13 @@ def __repr__(self) -> str:
113113
if self._chunk_shape is None:
114114
# for non-regular chunk grids
115115
kwargs["chunk_shape"] = "<variable>"
116-
if self._compressor is not None:
117-
template += "\nCompressor : {_compressor}"
118116

119-
if self._filters is not None:
117+
if len(self._filters) > 0:
120118
template += "\nFilters : {_filters}"
121-
122-
if self._codecs is not None:
123-
template += "\nCodecs : {_codecs}"
119+
if self._serializer is not None:
120+
template += "\nSerializer : {_serializer}"
121+
if len(self._compressors) > 0:
122+
template += "\nCompressors : {_compressors}"
124123

125124
if self._count_bytes is not None:
126125
template += "\nNo. bytes : {_count_bytes}"
@@ -139,5 +138,8 @@ def __repr__(self) -> str:
139138
kwargs["_storage_ratio"] = f"{self._count_bytes / self._count_bytes_stored:.1f}"
140139

141140
if self._count_chunks_initialized is not None:
142-
template += "\nChunks Initialized : {_count_chunks_initialized}"
141+
if self._shard_shape is not None:
142+
template += "\nShards Initialized : {_count_chunks_initialized}"
143+
else:
144+
template += "\nChunks Initialized : {_count_chunks_initialized}"
143145
return template.format(**kwargs)

src/zarr/core/array.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,9 @@ def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ..
924924
return ()
925925
return filters
926926

927-
return tuple(codec for codec in self.metadata.codecs if isinstance(codec, ArrayArrayCodec))
927+
return tuple(
928+
codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayArrayCodec)
929+
)
928930

929931
@property
930932
def serializer(self) -> ArrayBytesCodec | None:
@@ -934,7 +936,9 @@ def serializer(self) -> ArrayBytesCodec | None:
934936
if self.metadata.zarr_format == 2:
935937
return None
936938

937-
return next(codec for codec in self.metadata.codecs if isinstance(codec, ArrayBytesCodec))
939+
return next(
940+
codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayBytesCodec)
941+
)
938942

939943
@property
940944
@deprecated("Use AsyncArray.compressors instead.")
@@ -961,7 +965,9 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec
961965
return (self.metadata.compressor,)
962966
return ()
963967

964-
return tuple(codec for codec in self.metadata.codecs if isinstance(codec, BytesBytesCodec))
968+
return tuple(
969+
codec for codec in self.metadata.inner_codecs if isinstance(codec, BytesBytesCodec)
970+
)
965971

966972
@property
967973
def dtype(self) -> np.dtype[Any]:
@@ -1613,31 +1619,26 @@ async def info_complete(self) -> Any:
16131619
def _info(
16141620
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
16151621
) -> Any:
1616-
kwargs: dict[str, Any] = {}
1617-
if self.metadata.zarr_format == 2:
1618-
assert isinstance(self.metadata, ArrayV2Metadata)
1619-
if self.metadata.compressor is not None:
1620-
kwargs["_compressor"] = self.metadata.compressor
1621-
if self.metadata.filters is not None:
1622-
kwargs["_filters"] = self.metadata.filters
1623-
kwargs["_data_type"] = self.metadata.dtype
1624-
kwargs["_chunk_shape"] = self.metadata.chunks
1622+
_data_type: np.dtype[Any] | DataType
1623+
if isinstance(self.metadata, ArrayV2Metadata):
1624+
_data_type = self.metadata.dtype
16251625
else:
1626-
kwargs["_codecs"] = self.metadata.codecs
1627-
kwargs["_data_type"] = self.metadata.data_type
1628-
kwargs["_chunk_shape"] = self.chunks
1629-
kwargs["_shard_shape"] = self.shards
1630-
1626+
_data_type = self.metadata.data_type
16311627
return ArrayInfo(
16321628
_zarr_format=self.metadata.zarr_format,
1629+
_data_type=_data_type,
16331630
_shape=self.shape,
16341631
_order=self.order,
1632+
_shard_shape=self.shards,
1633+
_chunk_shape=self.chunks,
16351634
_read_only=self.read_only,
1635+
_compressors=self.compressors,
1636+
_filters=self.filters,
1637+
_serializer=self.serializer,
16361638
_store_type=type(self.store_path.store).__name__,
16371639
_count_bytes=self.nbytes,
16381640
_count_bytes_stored=count_bytes_stored,
16391641
_count_chunks_initialized=count_chunks_initialized,
1640-
**kwargs,
16411642
)
16421643

16431644

src/zarr/core/metadata/v3.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,17 +81,26 @@ def parse_codecs(data: object) -> tuple[Codec, ...]:
8181
return out
8282

8383

84-
def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
85-
"""Check that the codecs are valid for the given dtype"""
86-
84+
def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
8785
# ensure that we have at least one ArrayBytesCodec
8886
abcs: list[ArrayBytesCodec] = [codec for codec in codecs if isinstance(codec, ArrayBytesCodec)]
8987
if len(abcs) == 0:
9088
raise ValueError("At least one ArrayBytesCodec is required.")
9189
elif len(abcs) > 1:
9290
raise ValueError("Only one ArrayBytesCodec is allowed.")
9391

94-
abc = abcs[0]
92+
return abcs[0]
93+
94+
95+
def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
96+
"""Check that the codecs are valid for the given dtype"""
97+
from zarr.codecs.sharding import ShardingCodec
98+
99+
abc = validate_array_bytes_codec(codecs)
100+
101+
# Recursively resolve array-bytes codecs within sharding codecs
102+
while isinstance(abc, ShardingCodec):
103+
abc = validate_array_bytes_codec(abc.codecs)
95104

96105
# we need to have special codecs if we are decoding vlen strings or bytestrings
97106
# TODO: use codec ID instead of class name
@@ -330,6 +339,15 @@ def shards(self) -> ChunkCoords | None:
330339
)
331340
raise NotImplementedError(msg)
332341

342+
@property
343+
def inner_codecs(self) -> tuple[Codec, ...]:
344+
if isinstance(self.chunk_grid, RegularChunkGrid):
345+
from zarr.codecs.sharding import ShardingCodec
346+
347+
if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec):
348+
return self.codecs[0].codecs
349+
return self.codecs
350+
333351
def get_chunk_spec(
334352
self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
335353
) -> ArraySpec:

tests/test_array.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
VLenUTF8Codec,
2121
ZstdCodec,
2222
)
23-
from zarr.codecs.sharding import ShardingCodec
2423
from zarr.core._info import ArrayInfo
2524
from zarr.core.array import (
2625
CompressorsLike,
@@ -494,7 +493,7 @@ def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
494493
_read_only=False,
495494
_store_type="MemoryStore",
496495
_count_bytes=512,
497-
_compressor=numcodecs.Zstd(),
496+
_compressors=(numcodecs.Zstd(),),
498497
)
499498
assert result == expected
500499

@@ -510,9 +509,8 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
510509
_order="C",
511510
_read_only=False,
512511
_store_type="MemoryStore",
513-
_codecs=(BytesCodec(), ZstdCodec())
514-
if shards is None
515-
else (ShardingCodec(chunk_shape=chunks, codecs=[BytesCodec(), ZstdCodec()]),),
512+
_compressors=(ZstdCodec(),),
513+
_serializer=BytesCodec(),
516514
_count_bytes=512,
517515
)
518516
assert result == expected
@@ -536,7 +534,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
536534
_order="C",
537535
_read_only=False,
538536
_store_type="MemoryStore",
539-
_codecs=(BytesCodec(),) if shards is None else (ShardingCodec(chunk_shape=chunks),),
537+
_serializer=BytesCodec(),
540538
_count_bytes=512,
541539
_count_chunks_initialized=0,
542540
_count_bytes_stored=373 if shards is None else 578, # the metadata?
@@ -572,7 +570,7 @@ async def test_info_v2_async(
572570
_read_only=False,
573571
_store_type="MemoryStore",
574572
_count_bytes=512,
575-
_compressor=numcodecs.Zstd(),
573+
_compressors=(numcodecs.Zstd(),),
576574
)
577575
assert result == expected
578576

@@ -596,9 +594,8 @@ async def test_info_v3_async(
596594
_order="C",
597595
_read_only=False,
598596
_store_type="MemoryStore",
599-
_codecs=(BytesCodec(), ZstdCodec())
600-
if shards is None
601-
else (ShardingCodec(chunk_shape=chunks, codecs=[BytesCodec(), ZstdCodec()]),),
597+
_compressors=(ZstdCodec(),),
598+
_serializer=BytesCodec(),
602599
_count_bytes=512,
603600
)
604601
assert result == expected
@@ -624,7 +621,7 @@ async def test_info_complete_async(
624621
_order="C",
625622
_read_only=False,
626623
_store_type="MemoryStore",
627-
_codecs=(BytesCodec(),) if shards is None else (ShardingCodec(chunk_shape=chunks),),
624+
_serializer=BytesCodec(),
628625
_count_bytes=512,
629626
_count_chunks_initialized=0,
630627
_count_bytes_stored=373 if shards is None else 578, # the metadata?
@@ -1125,16 +1122,24 @@ async def test_create_array_no_filters_compressors(
11251122
({"name": "transpose", "configuration": {"order": [0]}},),
11261123
],
11271124
)
1125+
@pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))])
11281126
async def test_create_array_v3_chunk_encoding(
1129-
store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str
1127+
store: MemoryStore,
1128+
compressors: CompressorsLike,
1129+
filters: FiltersLike,
1130+
dtype: str,
1131+
chunks: tuple[int, ...],
1132+
shards: tuple[int, ...] | None,
11301133
) -> None:
11311134
"""
11321135
Test various possibilities for the compressors and filters parameter to create_array
11331136
"""
11341137
arr = await create_array(
11351138
store=store,
11361139
dtype=dtype,
1137-
shape=(10,),
1140+
shape=(12,),
1141+
chunks=chunks,
1142+
shards=shards,
11381143
zarr_format=3,
11391144
filters=filters,
11401145
compressors=compressors,

tests/test_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,12 @@ class NewCodec2(BytesCodec):
305305
@pytest.mark.parametrize(
306306
("dtype", "expected_codecs"),
307307
[
308-
("int", [BytesCodec(), GzipCodec()]),
309-
("bytes", [VLenBytesCodec(), GzipCodec()]),
310-
("str", [VLenUTF8Codec(), GzipCodec()]),
308+
("int", (BytesCodec(), GzipCodec())),
309+
("bytes", (VLenBytesCodec(), GzipCodec())),
310+
("str", (VLenUTF8Codec(), GzipCodec())),
311311
],
312312
)
313-
async def test_default_codecs(dtype: str, expected_codecs: list[Codec]) -> None:
313+
async def test_default_codecs(dtype: str, expected_codecs: tuple[Codec, ...]) -> None:
314314
with config.set(
315315
{
316316
"array.v3_default_codecs": { # test setting non-standard codecs

tests/test_info.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_array_info(zarr_format: ZarrFormat) -> None:
5959
_order="C",
6060
_read_only=True,
6161
_store_type="MemoryStore",
62-
_codecs=(BytesCodec(),),
62+
_serializer=BytesCodec(),
6363
)
6464
result = repr(info)
6565
assert result == textwrap.dedent(f"""\
@@ -71,7 +71,7 @@ def test_array_info(zarr_format: ZarrFormat) -> None:
7171
Order : C
7272
Read-only : True
7373
Store type : MemoryStore
74-
Codecs : ({{'endian': <Endian.little: 'little'>}},)""")
74+
Serializer : {{'endian': <Endian.little: 'little'>}}""")
7575

7676

7777
@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
@@ -95,7 +95,7 @@ def test_array_info_complete(
9595
_order="C",
9696
_read_only=True,
9797
_store_type="MemoryStore",
98-
_codecs=(BytesCodec(),),
98+
_serializer=BytesCodec(),
9999
_count_bytes=count_bytes,
100100
_count_bytes_stored=count_bytes_stored,
101101
_count_chunks_initialized=count_chunks_initialized,
@@ -110,7 +110,7 @@ def test_array_info_complete(
110110
Order : C
111111
Read-only : True
112112
Store type : MemoryStore
113-
Codecs : ({{'endian': <Endian.little: 'little'>}},)
113+
Serializer : {{'endian': <Endian.little: 'little'>}}
114114
No. bytes : {count_bytes} ({count_bytes_formatted})
115115
No. bytes stored : {count_bytes_stored_formatted}
116116
Storage ratio : {storage_ratio_formatted}

0 commit comments

Comments
 (0)