zarr-developers
diff --git a/‎src/zarr/core/array.py‎
Lines changed: 81 additions & 40 deletions b/‎src/zarr/core/array.py‎
Lines changed: 81 additions & 40 deletions
diff --git a/‎src/zarr/core/buffer/core.py‎
Lines changed: 5 additions & 5 deletions b/‎src/zarr/core/buffer/core.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/zarr/core/chunk_key_encodings.py‎
Lines changed: 7 additions & 1 deletion b/‎src/zarr/core/chunk_key_encodings.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/zarr/core/config.py‎
Lines changed: 7 additions & 7 deletions b/‎src/zarr/core/config.py‎
Lines changed: 7 additions & 7 deletions
@@ -89,6 +89,8 @@
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
+    parse_compressor,
+    parse_filters,
 )
 from zarr.core.metadata.v3 import DataType, parse_node_type_array
 from zarr.core.sync import sync
@@ -164,7 +166,7 @@ async def get_array_metadata(
         )
         if zarr_json_bytes is not None and zarray_bytes is not None:
             # warn and favor v3
-            msg = f"Both zarr.json (zarr v3) and .zarray (zarr v2) metadata objects exist at {store_path}."
+            msg = f"Both zarr.json (Zarr v3) and .zarray (Zarr v2) metadata objects exist at {store_path}. Zarr v3 will be used."
             warnings.warn(msg, stacklevel=1)
         if zarr_json_bytes is None and zarray_bytes is None:
             raise FileNotFoundError(store_path)
@@ -667,8 +669,8 @@ async def _create_v2(
         config: ArrayConfig,
         dimension_separator: Literal[".", "/"] | None = None,
         fill_value: float | None = None,
-        filters: list[dict[str, JSON]] | None = None,
-        compressor: dict[str, JSON] | None = None,
+        filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+        compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
         attributes: dict[str, JSON] | None = None,
         overwrite: bool = False,
     ) -> AsyncArray[ArrayV2Metadata]:
@@ -803,6 +805,7 @@ def shape(self) -> ChunkCoords:
     @property
     def chunks(self) -> ChunkCoords:
         """Returns the chunk shape of the Array.
+        If sharding is used the inner chunk shape is returned.
 
         Only defined for arrays using using `RegularChunkGrid`.
         If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
@@ -812,14 +815,22 @@ def chunks(self) -> ChunkCoords:
         ChunkCoords:
             The chunk shape of the Array.
         """
-        if isinstance(self.metadata.chunk_grid, RegularChunkGrid):
-            return self.metadata.chunk_grid.chunk_shape
+        return self.metadata.chunks
 
-        msg = (
-            f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`."
-            f"This array has a {self.metadata.chunk_grid} instead."
-        )
-        raise NotImplementedError(msg)
+    @property
+    def shards(self) -> ChunkCoords | None:
+        """Returns the shard shape of the Array.
+        Returns None if sharding is not used.
+
+        Only defined for arrays using using `RegularChunkGrid`.
+        If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
+
+        Returns
+        -------
+        ChunkCoords:
+            The shard shape of the Array.
+        """
+        return self.metadata.shards
 
     @property
     def size(self) -> int:
@@ -1733,6 +1744,10 @@ def shape(self, value: ChunkCoords) -> None:
     @property
     def chunks(self) -> ChunkCoords:
         """Returns a tuple of integers describing the length of each dimension of a chunk of the array.
+        If sharding is used the inner chunk shape is returned.
+
+        Only defined for arrays using using `RegularChunkGrid`.
+        If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
 
         Returns
         -------
@@ -1741,6 +1756,21 @@ def chunks(self) -> ChunkCoords:
         """
         return self._async_array.chunks
 
+    @property
+    def shards(self) -> ChunkCoords | None:
+        """Returns a tuple of integers describing the length of each dimension of a shard of the array.
+        Returns None if sharding is not used.
+
+        Only defined for arrays using using `RegularChunkGrid`.
+        If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised.
+
+        Returns
+        -------
+        tuple | None
+            A tuple of integers representing the length of each dimension of a shard or None if sharding is not used.
+        """
+        return self._async_array.shards
+
     @property
     def size(self) -> int:
         """Returns the total number of elements in the array.
@@ -3464,7 +3494,7 @@ def _get_default_codecs(
     else:
         dtype_key = "numeric"
 
-    return default_codecs[dtype_key]
+    return cast(list[dict[str, JSON]], default_codecs[dtype_key])
 
 
 FiltersParam: TypeAlias = (
@@ -3473,7 +3503,7 @@ def _get_default_codecs(
     | Iterable[numcodecs.abc.Codec]
     | Literal["auto"]
 )
-CompressionParam: TypeAlias = (
+CompressorsParam: TypeAlias = (
     Iterable[dict[str, JSON] | BytesBytesCodec]
     | BytesBytesCodec
     | numcodecs.abc.Codec
@@ -3490,7 +3520,7 @@ async def create_array(
     chunks: ChunkCoords | Literal["auto"] = "auto",
     shards: ChunkCoords | Literal["auto"] | None = None,
     filters: FiltersParam = "auto",
-    compressors: CompressionParam = "auto",
+    compressors: CompressorsParam = "auto",
     fill_value: Any | None = 0,
     order: MemoryOrder | None = None,
     zarr_format: ZarrFormat | None = 3,
@@ -3522,16 +3552,16 @@ async def create_array(
     filters : Iterable[Codec], optional
         Iterable of filters to apply to each chunk of the array, in order, before serializing that
         chunk to bytes.
-        For Zarr v3, a "filter" is a transformation that takes an array and returns an array,
+        For Zarr v3, a "filter" is a codec that takes an array and returns an array,
         and these values must be instances of ``ArrayArrayCodec``, or dict representations
         of ``ArrayArrayCodec``.
         For Zarr v2, a "filter" can be any numcodecs codec; you should ensure that the
         the order if your filters is consistent with the behavior of each filter.
     compressors : Iterable[Codec], optional
         List of compressors to apply to the array. Compressors are applied in order, and after any
         filters are applied (if any are specified).
-        For Zarr v3, a "compressor" is a transformation that takes a string of bytes and
-        returns another string of bytes.
+        For Zarr v3, a "compressor" is a codec that takes a bytestrea, and
+        returns another bytestream.
         For Zarr v2, a "compressor" can be any numcodecs codec.
     fill_value : Any, optional
         Fill value for the array.
@@ -3589,13 +3619,8 @@ async def create_array(
             )
 
             raise ValueError(msg)
-        if filters != "auto" and not all(isinstance(f, numcodecs.abc.Codec) for f in filters):
-            raise TypeError(
-                "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
-            )
-        filters = cast(Iterable[numcodecs.abc.Codec] | Literal["auto"], filters)
         filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
-            compression=compressors, filters=filters, dtype=dtype_parsed
+            compressor=compressors, filters=filters, dtype=dtype_parsed
         )
         if dimension_names is not None:
             raise ValueError("Zarr v2 arrays do not support dimension names.")
@@ -3622,7 +3647,7 @@ async def create_array(
         array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3(
             compressors=compressors, filters=filters, dtype=dtype_parsed
         )
-        sub_codecs = (*array_array, array_bytes, *bytes_bytes)
+        sub_codecs = cast(tuple[Codec, ...], (*array_array, array_bytes, *bytes_bytes))
         codecs_out: tuple[Codec, ...]
         if shard_shape_parsed is not None:
             sharding_codec = ShardingCodec(chunk_shape=chunk_shape_parsed, codecs=sub_codecs)
@@ -3666,7 +3691,7 @@ def _parse_chunk_key_encoding(
     """
     if data is None:
         if zarr_format == 2:
-            result = ChunkKeyEncoding.from_dict({"name": "v2", "separator": "/"})
+            result = ChunkKeyEncoding.from_dict({"name": "v2", "separator": "."})
         else:
             result = ChunkKeyEncoding.from_dict({"name": "default", "separator": "/"})
     elif isinstance(data, ChunkKeyEncoding):
@@ -3726,61 +3751,77 @@ def _get_default_encoding_v3(
 
 def _get_default_chunk_encoding_v2(
     dtype: np.dtype[Any],
-) -> tuple[tuple[numcodecs.abc.Codec, ...], numcodecs.abc.Codec | None]:
+) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Get the default chunk encoding for zarr v2 arrays, given a dtype
     """
 
     compressor_dict = _default_compressor(dtype)
     filter_dicts = _default_filters(dtype)
 
-    compressor = numcodecs.get_codec(compressor_dict)
-    filters = tuple(numcodecs.get_codec(f) for f in filter_dicts)
+    compressor = None
+    if compressor_dict is not None:
+        compressor = numcodecs.get_codec(compressor_dict)
+
+    filters = None
+    if filter_dicts is not None:
+        filters = tuple(numcodecs.get_codec(f) for f in filter_dicts)
+
     return filters, compressor
 
 
 def _parse_chunk_encoding_v2(
     *,
-    compression: numcodecs.abc.Codec | Literal["auto"],
-    filters: tuple[numcodecs.abc.Codec, ...] | Literal["auto"],
+    compressor: CompressorsParam,
+    filters: FiltersParam,
     dtype: np.dtype[Any],
-) -> tuple[tuple[numcodecs.abc.Codec, ...], numcodecs.abc.Codec]:
+) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Generate chunk encoding classes for v2 arrays with optional defaults.
     """
     default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
-    _filters: tuple[numcodecs.abc.Codec, ...] = ()
-    if compression == "auto":
+
+    _filters: tuple[numcodecs.abc.Codec, ...] | None = None
+    _compressor: numcodecs.abc.Codec | None = None
+
+    if compressor == "auto":
         _compressor = default_compressor
     else:
-        _compressor = compression
+        if isinstance(compressor, Iterable):
+            raise TypeError("For Zarr v2 arrays, the `compressor` must be a single codec.")
+        _compressor = parse_compressor(compressor)
     if filters == "auto":
         _filters = default_filters
     else:
-        _filters = filters
+        if not all(isinstance(f, numcodecs.abc.Codec) for f in filters):
+            raise TypeError(
+                "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
+            )
+        _filters = parse_filters(filters)
+
     return _filters, _compressor
 
 
 def _parse_chunk_encoding_v3(
     *,
-    compressors: Iterable[BytesBytesCodec | dict[str, JSON]] | Literal["auto"],
-    filters: Iterable[ArrayArrayCodec | dict[str, JSON]] | Literal["auto"],
+    compressors: CompressorsParam,
+    filters: FiltersParam,
     dtype: np.dtype[Any],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
     """
     default_array_array, default_array_bytes, default_bytes_bytes = _get_default_encoding_v3(dtype)
-    maybe_bytes_bytes: Iterable[BytesBytesCodec | dict[str, JSON]]
-    maybe_array_array: Iterable[ArrayArrayCodec | dict[str, JSON]]
+    maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
+    maybe_array_array: Iterable[Codec | dict[str, JSON]]
 
     if compressors == "auto":
         out_bytes_bytes = default_bytes_bytes
     else:
         if isinstance(compressors, dict | Codec):
             maybe_bytes_bytes = (compressors,)
         else:
-            maybe_bytes_bytes = compressors
+            maybe_bytes_bytes = cast(Iterable[Codec | dict[str, JSON]], compressors)
 
         out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
 
@@ -3790,7 +3831,7 @@ def _parse_chunk_encoding_v3(
         if isinstance(filters, dict | Codec):
             maybe_array_array = (filters,)
         else:
-            maybe_array_array = filters
+            maybe_array_array = cast(Iterable[Codec | dict[str, JSON]], filters)
         out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array)
 
     return out_array_array, default_array_bytes, out_bytes_bytes
@@ -16,11 +16,6 @@
 import numpy as np
 import numpy.typing as npt
 
-from zarr.registry import (
-    get_buffer_class,
-    get_ndbuffer_class,
-)
-
 if TYPE_CHECKING:
     from collections.abc import Iterable, Sequence
     from typing import Self
@@ -507,4 +502,9 @@ class BufferPrototype(NamedTuple):
 
 # The default buffer prototype used throughout the Zarr codebase.
 def default_buffer_prototype() -> BufferPrototype:
+    from zarr.registry import (
+        get_buffer_class,
+        get_ndbuffer_class,
+    )
+
     return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class())
@@ -36,10 +36,16 @@ def __init__(self, *, separator: SeparatorLiteral) -> None:
         object.__setattr__(self, "separator", separator_parsed)
 
     @classmethod
-    def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncoding) -> ChunkKeyEncoding:
+    def from_dict(
+        cls, data: dict[str, JSON] | ChunkKeyEncoding | ChunkKeyEncodingParams
+    ) -> ChunkKeyEncoding:
         if isinstance(data, ChunkKeyEncoding):
             return data
 
+        # handle ChunkKeyEncodingParams
+        if "name" in data and "separator" in data:
+            data = {"name": data["name"], "configuration": {"separator": data["separator"]}}
+
         # configuration is optional for chunk key encodings
         name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False)
         if name_parsed == "default":
 
@@ -67,27 +67,27 @@ def reset(self) -> None:
                 "order": "C",
                 "write_empty_chunks": False,
                 "v2_default_compressor": {
-                    "numeric": {"id": "zstd", "level": 0, "checksum": True},
-                    "string": {"id": "zstd", "level": 0, "checksum": True},
-                    "bytes": {"id": "zstd", "level": 0, "checksum": True},
+                    "numeric": {"id": "zstd", "level": 0, "checksum": False},
+                    "string": {"id": "zstd", "level": 0, "checksum": False},
+                    "bytes": {"id": "zstd", "level": 0, "checksum": False},
                 },
                 "v2_default_filters": {
-                    "numeric": [],
+                    "numeric": None,
                     "string": [{"id": "vlen-utf8"}],
                     "bytes": [{"id": "vlen-bytes"}],
                 },
                 "v3_default_codecs": {
                     "numeric": [
                         {"name": "bytes", "configuration": {"endian": "little"}},
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": True}},
+                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
                     ],
                     "string": [
                         {"name": "vlen-utf8"},
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": True}},
+                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
                     ],
                     "bytes": [
                         {"name": "vlen-bytes"},
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": True}},
+                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
                     ],
                 },
             },