zarr-developers · normanrz · Jan 6, 2025 · Jan 5, 2025 · Jan 5, 2025 · Jan 5, 2025
diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
@@ -168,8 +168,8 @@ argument accepted by all array creation functions. For example::
    >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
    >>> z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors)
    >>> z[:] = data
-   >>> z.metadata.codecs
-   [BytesCodec(endian=<Endian.little: 'little'>), BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0)]
+   >>> z.compressors
+   (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
 
 This array above will use Blosc as the primary compressor, using the Zstandard
 algorithm (compression level 3) internally within Blosc, and with the
@@ -188,7 +188,8 @@ which can be used to print useful diagnostics, e.g.::
    Order              : C
    Read-only          : False
    Store type         : LocalStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 3, 'shuffle': <BloscShuffle.bitshuffle: 'bitshuffle'>, 'blocksize': 0}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
 
 The :func:`zarr.Array.info_complete` method inspects the underlying store and
@@ -203,7 +204,8 @@ prints additional diagnostics, e.g.::
    Order              : C
    Read-only          : False
    Store type         : LocalStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 3, 'shuffle': <BloscShuffle.bitshuffle: 'bitshuffle'>, 'blocksize': 0}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
    No. bytes stored   : 9696302
    Storage ratio      : 41.3
@@ -223,8 +225,8 @@ here is an array using Gzip compression, level 1::
    >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
    >>> z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1))
    >>> z[:] = data
-   >>> z.metadata.codecs
-   [BytesCodec(endian=<Endian.little: 'little'>), GzipCodec(level=1)]
+   >>> z.compressors
+   (GzipCodec(level=1),)
 
 Here is an example using LZMA from NumCodecs_ with a custom filter pipeline including LZMA's
 built-in delta filter::
@@ -236,23 +238,24 @@ built-in delta filter::
    >>> compressors = LZMA(filters=lzma_filters)
    >>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
    >>> z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors)
-   >>> z.metadata.codecs
-   [BytesCodec(endian=<Endian.little: 'little'>), _make_bytes_bytes_codec.<locals>._Codec(codec_name='numcodecs.lzma', codec_config={'id': 'lzma', 'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]})]
+   >>> z.compressors
+   (_make_bytes_bytes_codec.<locals>._Codec(codec_name='numcodecs.lzma', codec_config={'id': 'lzma', 'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),)
 
 The default compressor can be changed by setting the value of the using Zarr's
 :ref:`user-guide-config`, e.g.::
 
    >>> with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}):
    ...     z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2)
-   >>> z.metadata.filters
-   >>> z.metadata.compressor
-   Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
+   >>> z.filters
+   ()
+   >>> z.compressors
+   (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),)
 
 To disable compression, set ``compressors=None`` when creating an array, e.g.::
 
    >>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None)
-   >>> z.metadata.codecs
-   [BytesCodec(endian=<Endian.little: 'little'>)]
+   >>> z.compressors
+   ()
 
 .. _user-guide-filters:
 
@@ -287,7 +290,9 @@ Here is an example using a delta filter with the Blosc compressor::
    Order              : C
    Read-only          : False
    Store type         : LocalStore
-   Codecs             : [{'codec_name': 'numcodecs.delta', 'codec_config': {'id': 'delta', 'dtype': 'int32'}}, {'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 1, 'shuffle': <BloscShuffle.shuffle: 'shuffle'>, 'blocksize': 0}]
+   Filters            : (_make_array_array_codec.<locals>._Codec(codec_name='numcodecs.delta', codec_config={'id': 'delta', 'dtype': 'int32'}),)
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=1, shuffle=<BloscShuffle.shuffle: 'shuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
 
 For more information about available filter codecs, see the `Numcodecs
@@ -600,11 +605,12 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   Order              : C
   Read-only          : False
   Store type         : LocalStore
-  Codecs             : [{'chunk_shape': (100, 100), 'codecs': ({'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}), 'index_codecs': ({'endian': <Endian.little: 'little'>}, {}), 'index_location': <ShardingCodecIndexLocation.end: 'end'>}]
+  Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+  Compressors        : (ZstdCodec(level=0, checksum=False),)
   No. bytes          : 100000000 (95.4M)
   No. bytes stored   : 3981060
   Storage ratio      : 25.1
-  Chunks Initialized : 100
+  Shards Initialized : 100
 
 In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used.
 This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total.

diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst
@@ -52,8 +52,8 @@ that can be used.:
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
                           fill_value=np.float64(0.0),
-                          codecs=[BytesCodec(endian=<Endian.little: 'little'>),
-                                  ZstdCodec(level=0, checksum=False)],
+                          codecs=(BytesCodec(endian=<Endian.little: 'little'>),
+                                  ZstdCodec(level=0, checksum=False)),
                           attributes={},
                           dimension_names=None,
                           zarr_format=3,
@@ -65,8 +65,8 @@ that can be used.:
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
                           fill_value=np.float64(0.0),
-                          codecs=[BytesCodec(endian=<Endian.little: 'little'>),
-                                  ZstdCodec(level=0, checksum=False)],
+                          codecs=(BytesCodec(endian=<Endian.little: 'little'>),
+                                  ZstdCodec(level=0, checksum=False)),
                           attributes={},
                           dimension_names=None,
                           zarr_format=3,
@@ -78,8 +78,8 @@ that can be used.:
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
                           fill_value=np.float64(0.0),
-                          codecs=[BytesCodec(endian=<Endian.little: 'little'>),
-                                  ZstdCodec(level=0, checksum=False)],
+                          codecs=(BytesCodec(endian=<Endian.little: 'little'>),
+                                  ZstdCodec(level=0, checksum=False)),
                           attributes={},
                           dimension_names=None,
                           zarr_format=3,

diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
@@ -109,7 +109,8 @@ property. E.g.::
    Order              : C
    Read-only          : False
    Store type         : MemoryStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 8000000 (7.6M)
    No. bytes stored   : 1432
    Storage ratio      : 5586.6
@@ -123,7 +124,8 @@ property. E.g.::
    Order              : C
    Read-only          : False
    Store type         : MemoryStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 4000000 (3.8M)
 
 Groups also have the :func:`zarr.Group.tree` method, e.g.::

diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
@@ -98,7 +98,8 @@ To use sharding, you need to specify the ``shards`` parameter when creating the
    Order              : C
    Read-only          : False
    Store type         : MemoryStore
-   Codecs             : [{'chunk_shape': (100, 100, 100), 'codecs': ({'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}), 'index_codecs': ({'endian': <Endian.little: 'little'>}, {}), 'index_location': <ShardingCodecIndexLocation.end: 'end'>}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 100000000000 (93.1G)
 
 .. _user-guide-chunks-order:
@@ -125,7 +126,8 @@ ratios, depending on the correlation structure within the data. E.g.::
    Order              : C
    Read-only          : False
    Store type         : MemoryStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
    No. bytes stored   : 342588717
    Storage ratio      : 1.2
@@ -142,7 +144,8 @@ ratios, depending on the correlation structure within the data. E.g.::
    Order              : F
    Read-only          : False
    Store type         : MemoryStore
-   Codecs             : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
+   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
    No. bytes stored   : 342588717
    Storage ratio      : 1.2

diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
@@ -802,7 +802,7 @@ def create_array(
         Use ``None`` to omit default filters.
     compressors : Iterable[Codec], optional
         List of compressors to apply to the array. Compressors are applied in order, and after any
-        filters are applied (if any are specified).
+        filters are applied (if any are specified) and the data is serialized into bytes.
 
         For Zarr v3, a "compressor" is a codec that takes a bytestrea, and
         returns another bytestream. Multiple compressors my be provided for Zarr v3.

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
@@ -5,7 +5,7 @@
 import numcodecs.abc
 import numpy as np
 
-from zarr.abc.codec import Codec
+from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
 from zarr.core.metadata.v3 import DataType
 
@@ -85,9 +85,9 @@ class ArrayInfo:
     _order: Literal["C", "F"]
     _read_only: bool
     _store_type: str
-    _compressor: numcodecs.abc.Codec | None = None
-    _filters: tuple[numcodecs.abc.Codec, ...] | None = None
-    _codecs: list[Codec] | None = None
+    _filters: tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...] = ()
+    _serializer: ArrayBytesCodec | None = None
+    _compressors: tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...] = ()
     _count_bytes: int | None = None
     _count_bytes_stored: int | None = None
     _count_chunks_initialized: int | None = None
@@ -109,18 +109,19 @@ def __repr__(self) -> str:
         Read-only          : {_read_only}
         Store type         : {_store_type}""")
 
-        kwargs = dataclasses.asdict(self)
+        # We can't use dataclasses.asdict, because we only want a shallow dict
+        kwargs = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
+
         if self._chunk_shape is None:
             # for non-regular chunk grids
             kwargs["chunk_shape"] = "<variable>"
-        if self._compressor is not None:
-            template += "\nCompressor         : {_compressor}"
 
-        if self._filters is not None:
+        if len(self._filters) > 0:
             template += "\nFilters            : {_filters}"
-
-        if self._codecs is not None:
-            template += "\nCodecs             : {_codecs}"
+        if self._serializer is not None:
+            template += "\nSerializer         : {_serializer}"
+        if len(self._compressors) > 0:
+            template += "\nCompressors        : {_compressors}"
 
         if self._count_bytes is not None:
             template += "\nNo. bytes          : {_count_bytes}"
@@ -139,5 +140,8 @@ def __repr__(self) -> str:
             kwargs["_storage_ratio"] = f"{self._count_bytes / self._count_bytes_stored:.1f}"
 
         if self._count_chunks_initialized is not None:
-            template += "\nChunks Initialized : {_count_chunks_initialized}"
+            if self._shard_shape is not None:
+                template += "\nShards Initialized : {_count_chunks_initialized}"
+            else:
+                template += "\nChunks Initialized : {_count_chunks_initialized}"
         return template.format(**kwargs)