fix mypy

normanrz · normanrz · commit fb286a7283dc · 2024-12-28T13:34:31.000+01:00
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -89,6 +89,8 @@
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
+    parse_compressor,
+    parse_filters,
 )
 from zarr.core.metadata.v3 import DataType, parse_node_type_array
 from zarr.core.sync import sync
@@ -164,7 +166,7 @@ async def get_array_metadata(
         )
         if zarr_json_bytes is not None and zarray_bytes is not None:
             # warn and favor v3
-            msg = f"Both zarr.json (zarr v3) and .zarray (zarr v2) metadata objects exist at {store_path}."
+            msg = f"Both zarr.json (Zarr v3) and .zarray (Zarr v2) metadata objects exist at {store_path}. Zarr v3 will be used."
             warnings.warn(msg, stacklevel=1)
         if zarr_json_bytes is None and zarray_bytes is None:
             raise FileNotFoundError(store_path)
@@ -667,8 +669,8 @@ async def _create_v2(
         config: ArrayConfig,
         dimension_separator: Literal[".", "/"] | None = None,
         fill_value: float | None = None,
-        filters: list[dict[str, JSON]] | None = None,
-        compressor: dict[str, JSON] | None = None,
+        filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+        compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
         attributes: dict[str, JSON] | None = None,
         overwrite: bool = False,
     ) -> AsyncArray[ArrayV2Metadata]:
@@ -3492,13 +3494,13 @@ def _get_default_codecs(
     else:
         dtype_key = "numeric"
 
-    return default_codecs[dtype_key]
+    return cast(list[dict[str, JSON]], default_codecs[dtype_key])
 
 
 FiltersParam: TypeAlias = (
     Iterable[dict[str, JSON] | Codec] | Iterable[numcodecs.abc.Codec] | Literal["auto"]
 )
-CompressionParam: TypeAlias = (
+CompressorsParam: TypeAlias = (
     Iterable[dict[str, JSON] | Codec] | Codec | numcodecs.abc.Codec | Literal["auto"]
 )
 
@@ -3512,7 +3514,7 @@ async def create_array(
     chunks: ChunkCoords | Literal["auto"] = "auto",
     shards: ChunkCoords | Literal["auto"] | None = None,
     filters: FiltersParam = "auto",
-    compressors: CompressionParam = "auto",
+    compressors: CompressorsParam = "auto",
     fill_value: Any | None = 0,
     order: MemoryOrder | None = "C",
     zarr_format: ZarrFormat | None = 3,
@@ -3544,16 +3546,16 @@ async def create_array(
     filters : Iterable[Codec], optional
         Iterable of filters to apply to each chunk of the array, in order, before serializing that
         chunk to bytes.
-        For Zarr v3, a "filter" is a transformation that takes an array and returns an array,
+        For Zarr v3, a "filter" is a codec that takes an array and returns an array,
         and these values must be instances of ``ArrayArrayCodec``, or dict representations
         of ``ArrayArrayCodec``.
         For Zarr v2, a "filter" can be any numcodecs codec; you should ensure that the
         the order if your filters is consistent with the behavior of each filter.
     compressors : Iterable[Codec], optional
         List of compressors to apply to the array. Compressors are applied in order, and after any
         filters are applied (if any are specified).
-        For Zarr v3, a "compressor" is a transformation that takes a string of bytes and
-        returns another string of bytes.
+        For Zarr v3, a "compressor" is a codec that takes a bytestrea, and
+        returns another bytestream.
         For Zarr v2, a "compressor" can be any numcodecs codec.
     fill_value : Any, optional
         Fill value for the array.
@@ -3611,11 +3613,6 @@ async def create_array(
             )
 
             raise ValueError(msg)
-        if filters != "auto" and not all(isinstance(f, numcodecs.abc.Codec) for f in filters):
-            raise TypeError(
-                "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
-            )
-        filters = cast(Iterable[numcodecs.abc.Codec] | Literal["auto"], filters)
         filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
             compressor=compressors, filters=filters, dtype=dtype_parsed
         )
@@ -3644,7 +3641,7 @@ async def create_array(
         array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3(
             compressors=compressors, filters=filters, dtype=dtype_parsed
         )
-        sub_codecs = (*array_array, array_bytes, *bytes_bytes)
+        sub_codecs = cast(tuple[Codec, ...], (*array_array, array_bytes, *bytes_bytes))
         codecs_out: tuple[Codec, ...]
         if shard_shape_parsed is not None:
             sharding_codec = ShardingCodec(chunk_shape=chunk_shape_parsed, codecs=sub_codecs)
@@ -3688,7 +3685,7 @@ def _parse_chunk_key_encoding(
     """
     if data is None:
         if zarr_format == 2:
-            result = ChunkKeyEncoding.from_dict({"name": "v2", "separator": "/"})
+            result = ChunkKeyEncoding.from_dict({"name": "v2", "separator": "."})
         else:
             result = ChunkKeyEncoding.from_dict({"name": "default", "separator": "/"})
     elif isinstance(data, ChunkKeyEncoding):
@@ -3769,46 +3766,56 @@ def _get_default_chunk_encoding_v2(
 
 def _parse_chunk_encoding_v2(
     *,
-    compressor: numcodecs.abc.Codec | Literal["auto"],
-    filters: tuple[numcodecs.abc.Codec, ...] | Literal["auto"],
+    compressor: CompressorsParam,
+    filters: FiltersParam,
     dtype: np.dtype[Any],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Generate chunk encoding classes for v2 arrays with optional defaults.
     """
     default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
-    _filters: tuple[numcodecs.abc.Codec, ...] = ()
+
+    _filters: tuple[numcodecs.abc.Codec, ...] | None = None
+    _compressor: numcodecs.abc.Codec | None = None
+
     if compressor == "auto":
         _compressor = default_compressor
     else:
-        _compressor = compressor
+        if isinstance(compressor, Iterable):
+            raise TypeError("For Zarr v2 arrays, the `compressor` must be a single codec.")
+        _compressor = parse_compressor(compressor)
     if filters == "auto":
         _filters = default_filters
     else:
-        _filters = filters
+        if not all(isinstance(f, numcodecs.abc.Codec) for f in filters):
+            raise TypeError(
+                "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
+            )
+        _filters = parse_filters(filters)
+
     return _filters, _compressor
 
 
 def _parse_chunk_encoding_v3(
     *,
-    compressors: Iterable[BytesBytesCodec | dict[str, JSON]] | Literal["auto"],
-    filters: Iterable[ArrayArrayCodec | dict[str, JSON]] | Literal["auto"],
+    compressors: CompressorsParam,
+    filters: FiltersParam,
     dtype: np.dtype[Any],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
     """
     default_array_array, default_array_bytes, default_bytes_bytes = _get_default_encoding_v3(dtype)
-    maybe_bytes_bytes: Iterable[BytesBytesCodec | dict[str, JSON]]
-    maybe_array_array: Iterable[ArrayArrayCodec | dict[str, JSON]]
+    maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
+    maybe_array_array: Iterable[Codec | dict[str, JSON]]
 
     if compressors == "auto":
         out_bytes_bytes = default_bytes_bytes
     else:
         if isinstance(compressors, dict | Codec):
             maybe_bytes_bytes = (compressors,)
         else:
-            maybe_bytes_bytes = compressors
+            maybe_bytes_bytes = cast(Iterable[Codec | dict[str, JSON]], compressors)
 
         out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
 
@@ -3818,7 +3825,7 @@ def _parse_chunk_encoding_v3(
         if isinstance(filters, dict | Codec):
             maybe_array_array = (filters,)
         else:
-            maybe_array_array = filters
+            maybe_array_array = cast(Iterable[Codec | dict[str, JSON]], filters)
         out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array)
 
     return out_array_array, default_array_bytes, out_bytes_bytes
diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py
@@ -36,10 +36,16 @@ def __init__(self, *, separator: SeparatorLiteral) -> None:
         object.__setattr__(self, "separator", separator_parsed)
 
     @classmethod
-    def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncoding) -> ChunkKeyEncoding:
+    def from_dict(
+        cls, data: dict[str, JSON] | ChunkKeyEncoding | ChunkKeyEncodingParams
+    ) -> ChunkKeyEncoding:
         if isinstance(data, ChunkKeyEncoding):
             return data
 
+        # handle ChunkKeyEncodingParams
+        if "name" in data and "separator" in data:
+            data = {"name": data["name"], "configuration": {"separator": data["separator"]}}
+
         # configuration is optional for chunk key encodings
         name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False)
         if name_parsed == "default":
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -21,7 +21,7 @@
 from zarr.core.array import (
     Array,
     AsyncArray,
-    CompressionParam,
+    CompressorsParam,
     FiltersParam,
     _build_parents,
     create_array,
@@ -511,7 +511,7 @@ async def open(
             )
             if zarr_json_bytes is not None and zgroup_bytes is not None:
                 # warn and favor v3
-                msg = f"Both zarr.json (zarr v3) and .zgroup (zarr v2) metadata objects exist at {store_path}."
+                msg = f"Both zarr.json (Zarr v3) and .zgroup (Zarr v2) metadata objects exist at {store_path}. Zarr v3 will be used."
                 warnings.warn(msg, stacklevel=1)
             if zarr_json_bytes is None and zgroup_bytes is None:
                 raise FileNotFoundError(
@@ -1011,7 +1011,7 @@ async def create_array(
         chunks: ChunkCoords | Literal["auto"] = "auto",
         shards: ChunkCoords | Literal["auto"] | None = None,
         filters: FiltersParam = "auto",
-        compressors: CompressionParam = "auto",
+        compressors: CompressorsParam = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = "C",
         attributes: dict[str, JSON] | None = None,
@@ -2539,8 +2539,8 @@ def array(
         dtype: npt.DTypeLike,
         chunks: ChunkCoords | Literal["auto"] = "auto",
         shards: ChunkCoords | Literal["auto"] | None = None,
-        filters: Iterable[dict[str, JSON] | Codec] = "auto",
-        compressors: Iterable[dict[str, JSON] | Codec] = "auto",
+        filters: FiltersParam = "auto",
+        compressors: CompressorsParam = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = "C",
         attributes: dict[str, JSON] | None = None,
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
@@ -16,7 +16,7 @@
     import numpy.typing as npt
 
     from zarr.core.buffer import Buffer, BufferPrototype
-    from zarr.core.common import JSON, ChunkCoords
+    from zarr.core.common import ChunkCoords
 
 import json
 from dataclasses import dataclass, field, fields, replace
@@ -27,7 +27,7 @@
 from zarr.core.array_spec import ArrayConfig, ArraySpec
 from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.chunk_key_encodings import parse_separator
-from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike
+from zarr.core.common import JSON, ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike
 from zarr.core.config import config, parse_indexing_order
 from zarr.core.metadata.common import parse_attributes
 
@@ -352,7 +352,7 @@ def _default_compressor(
     else:
         raise ValueError(f"Unsupported dtype kind {dtype.kind}")
 
-    return default_compressor.get(dtype_key, None)
+    return cast(dict[str, JSON] | None, default_compressor.get(dtype_key, None))
 
 
 def _default_filters(
@@ -372,4 +372,4 @@ def _default_filters(
     else:
         raise ValueError(f"Unsupported dtype kind {dtype.kind}")
 
-    return default_filters.get(dtype_key, None)
+    return cast(list[dict[str, JSON]] | None, default_filters.get(dtype_key, None))
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
@@ -548,7 +548,7 @@ def default_fill_value(dtype: DataType) -> str | bytes | np.generic:
     else:
         np_dtype = dtype.to_numpy()
         np_dtype = cast(np.dtype[Any], np_dtype)
-        return np_dtype.type(0)
+        return np_dtype.type(0)  # type: ignore[misc]
 
 
 # For type checking
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
@@ -161,7 +161,7 @@ def _resolve_codec(data: dict[str, JSON]) -> Codec:
     return get_codec_class(data["name"]).from_dict(data)  # type: ignore[arg-type]
 
 
-def _parse_bytes_bytes_codec(data: dict[str, JSON] | BytesBytesCodec) -> BytesBytesCodec:
+def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec:
     """
     Normalize the input to a ``BytesBytesCodec`` instance.
     If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it
@@ -173,6 +173,8 @@ def _parse_bytes_bytes_codec(data: dict[str, JSON] | BytesBytesCodec) -> BytesBy
             msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead."
             raise TypeError(msg)
     else:
+        if not isinstance(data, BytesBytesCodec):
+            raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.")
         result = data
     return result
 
@@ -193,7 +195,7 @@ def _parse_array_bytes_codec(data: dict[str, JSON] | ArrayBytesCodec) -> ArrayBy
     return result
 
 
-def _parse_array_array_codec(data: dict[str, JSON] | ArrayArrayCodec) -> ArrayArrayCodec:
+def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec:
     """
     Normalize the input to a ``ArrayArrayCodec`` instance.
     If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it
@@ -205,6 +207,8 @@ def _parse_array_array_codec(data: dict[str, JSON] | ArrayArrayCodec) -> ArrayAr
             msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead."
             raise TypeError(msg)
     else:
+        if not isinstance(data, ArrayArrayCodec):
+            raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.")
         result = data
     return result
 
diff --git a/tests/test_array.py b/tests/test_array.py
@@ -22,7 +22,8 @@
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import ceildiv
-from zarr.core.metadata.v3 import DataType
+from zarr.core.metadata.v2 import ArrayV2Metadata
+from zarr.core.metadata.v3 import ArrayV3Metadata, DataType
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore
@@ -885,7 +886,9 @@ async def test_nbytes(
         assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize
 
 
-def _get_partitioning(data: AsyncArray) -> tuple[tuple[int, ...], tuple[int, ...] | None]:
+def _get_partitioning(
+    data: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata],
+) -> tuple[tuple[int, ...], tuple[int, ...] | None]:
     """
     Get the shard shape and chunk shape of an array. If the array is not sharded, the shard shape
     will be None.