diff --git a/changes/3318.misc.rst b/changes/3318.misc.rst new file mode 100644 index 0000000000..f8308e6b97 --- /dev/null +++ b/changes/3318.misc.rst @@ -0,0 +1,2 @@ +Define a ``Protocol`` to model the ``numcodecs.abc.Codec`` interface. This is groundwork toward +making ``numcodecs`` an optional dependency for ``zarr-python``. \ No newline at end of file diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index f8a5447a70..d5c995d2ca 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -1,11 +1,14 @@ from __future__ import annotations from abc import abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar +from collections.abc import Mapping +from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar + +from typing_extensions import ReadOnly, TypedDict from zarr.abc.metadata import Metadata from zarr.core.buffer import Buffer, NDBuffer -from zarr.core.common import ChunkCoords, concurrent_map +from zarr.core.common import ChunkCoords, NamedConfig, concurrent_map from zarr.core.config import config if TYPE_CHECKING: @@ -34,6 +37,27 @@ CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer) +TName = TypeVar("TName", bound=str, covariant=True) + + +class CodecJSON_V2(TypedDict, Generic[TName]): + """The JSON representation of a codec for Zarr V2""" + + id: ReadOnly[TName] + + +def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]: + return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str) + + +CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]] +"""The JSON representation of a codec for Zarr V3.""" + +# The widest type we will *accept* for a codec JSON +# This covers v2 and v3 +CodecJSON = str | Mapping[str, object] +"""The widest type of JSON-like input that could specify a codec.""" + class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]): """Generic base class for codecs. diff --git a/src/zarr/abc/numcodec.py b/src/zarr/abc/numcodec.py new file mode 100644 index 0000000000..76eac1d898 --- /dev/null +++ b/src/zarr/abc/numcodec.py @@ -0,0 +1,101 @@ +from typing import Any, Self, TypeGuard + +from typing_extensions import Protocol + + +class Numcodec(Protocol): + """ + A protocol that models the ``numcodecs.abc.Codec`` interface. + + This protocol should be considered experimental. Expect the type annotations for ``buf`` and + ``out`` to narrow in the future. + """ + + codec_id: str + + def encode(self, buf: Any) -> Any: + """Encode data from ``buf``. + + Parameters + ---------- + buf : Any + Data to be encoded. + + Returns + ------- + enc: Any + Encoded data. + """ + ... + + def decode(self, buf: Any, out: Any | None = None) -> Any: + """ + Decode data in ``buf``. + + Parameters + ---------- + buf : Any + Encoded data. + out : Any + Writeable buffer to store decoded data. If provided, this buffer must + be exactly the right size to store the decoded data. + + Returns + ------- + dec : Any + Decoded data. + """ + ... + + def get_config(self) -> Any: + """ + Return a JSON-serializable configuration dictionary for this + codec. Must include an ``'id'`` field with the codec identifier. + """ + ... + + @classmethod + def from_config(cls, config: Any) -> Self: + """ + Instantiate a codec from a configuration dictionary. + + Parameters + ---------- + config : Any + A configuration dictionary for this codec. + """ + ... + + +def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]: + """ + Check if the given object is a class implements the Numcodec protocol. + + The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method + members (i.e., attributes), so we use this function to manually check for the presence of the + required attributes and methods on a given object. + """ + return ( + isinstance(obj, type) + and hasattr(obj, "codec_id") + and isinstance(obj.codec_id, str) + and hasattr(obj, "encode") + and callable(obj.encode) + and hasattr(obj, "decode") + and callable(obj.decode) + and hasattr(obj, "get_config") + and callable(obj.get_config) + and hasattr(obj, "from_config") + and callable(obj.from_config) + ) + + +def _is_numcodec(obj: object) -> TypeGuard[Numcodec]: + """ + Check if the given object implements the Numcodec protocol. + + The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method + members (i.e., attributes), so we use this function to manually check for the presence of the + required attributes and methods on a given object. + """ + return _is_numcodec_cls(type(obj)) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 78b68caf73..a044ba8594 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -52,9 +52,8 @@ if TYPE_CHECKING: from collections.abc import Iterable - import numcodecs.abc - from zarr.abc.codec import Codec + from zarr.abc.numcodec import Numcodec from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_key_encodings import ChunkKeyEncoding from zarr.storage import StoreLike @@ -877,7 +876,7 @@ async def create( overwrite: bool = False, path: PathLike | None = None, chunk_store: StoreLike | None = None, - filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, + filters: Iterable[dict[str, JSON] | Numcodec] | None = None, cache_metadata: bool | None = None, cache_attrs: bool | None = None, read_only: bool | None = None, diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index ed1ae2cf2a..50a1c0fa20 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -15,11 +15,11 @@ if TYPE_CHECKING: from collections.abc import Iterable - import numcodecs.abc import numpy as np import numpy.typing as npt from zarr.abc.codec import Codec + from zarr.abc.numcodec import Numcodec from zarr.api.asynchronous import ArrayLike, PathLike from zarr.core.array import ( CompressorsLike, @@ -610,7 +610,7 @@ def create( overwrite: bool = False, path: PathLike | None = None, chunk_store: StoreLike | None = None, - filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, + filters: Iterable[dict[str, JSON] | Numcodec] | None = None, cache_metadata: bool | None = None, cache_attrs: bool | None = None, read_only: bool | None = None, diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py index 08853f27f1..3c6c99c21c 100644 --- a/src/zarr/codecs/_v2.py +++ b/src/zarr/codecs/_v2.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -import numcodecs import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray_like @@ -12,16 +11,15 @@ from zarr.registry import get_ndbuffer_class if TYPE_CHECKING: - import numcodecs.abc - + from zarr.abc.numcodec import Numcodec from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, NDBuffer @dataclass(frozen=True) class V2Codec(ArrayBytesCodec): - filters: tuple[numcodecs.abc.Codec, ...] | None - compressor: numcodecs.abc.Codec | None + filters: tuple[Numcodec, ...] | None + compressor: Numcodec | None is_fixed_size = False @@ -86,7 +84,6 @@ async def _encode_single( if self.filters: for f in self.filters: chunk = await asyncio.to_thread(f.encode, chunk) - # check object encoding if ensure_ndarray_like(chunk).dtype == object: raise RuntimeError("cannot write object array without object codec") @@ -96,7 +93,6 @@ async def _encode_single( cdata = await asyncio.to_thread(self.compressor.encode, chunk) else: cdata = chunk - cdata = ensure_bytes(cdata) return chunk_spec.prototype.buffer.from_bytes(cdata) diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py index a5b14d573a..fef424346a 100644 --- a/src/zarr/core/_info.py +++ b/src/zarr/core/_info.py @@ -5,9 +5,8 @@ from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: - import numcodecs.abc - from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec + from zarr.abc.numcodec import Numcodec from zarr.core.common import ZarrFormat from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -88,9 +87,9 @@ class ArrayInfo: _order: Literal["C", "F"] _read_only: bool _store_type: str - _filters: tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...] = () + _filters: tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...] = () _serializer: ArrayBytesCodec | None = None - _compressors: tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...] = () + _compressors: tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...] = () _count_bytes: int | None = None _count_bytes_stored: int | None = None _count_chunks_initialized: int | None = None diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 311a0eb986..2ce33df7ba 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -19,13 +19,12 @@ ) from warnings import warn -import numcodecs -import numcodecs.abc import numpy as np from typing_extensions import deprecated import zarr from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec +from zarr.abc.numcodec import Numcodec, _is_numcodec from zarr.abc.store import Store, set_or_delete from zarr.codecs._v2 import V2Codec from zarr.codecs.bytes import BytesCodec @@ -607,7 +606,7 @@ async def _create( chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, - filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, + filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, @@ -818,7 +817,7 @@ def _create_metadata_v2( order: MemoryOrder, dimension_separator: Literal[".", "/"] | None = None, fill_value: Any | None = DEFAULT_FILL_VALUE, - filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, + filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: @@ -856,7 +855,7 @@ async def _create_v2( config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, fill_value: Any | None = DEFAULT_FILL_VALUE, - filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, + filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, overwrite: bool = False, @@ -1033,7 +1032,7 @@ def size(self) -> int: return np.prod(self.metadata.shape).item() @property - def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]: + def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. @@ -1062,7 +1061,7 @@ def serializer(self) -> ArrayBytesCodec | None: @property @deprecated("Use AsyncArray.compressors instead.", category=ZarrDeprecationWarning) - def compressor(self) -> numcodecs.abc.Codec | None: + def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. @@ -1075,7 +1074,7 @@ def compressor(self) -> numcodecs.abc.Codec | None: raise TypeError("`compressor` is not available for Zarr format 3 arrays.") @property - def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]: + def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -2227,7 +2226,7 @@ def fill_value(self) -> Any: return self.metadata.fill_value @property - def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]: + def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. @@ -2243,7 +2242,7 @@ def serializer(self) -> None | ArrayBytesCodec: @property @deprecated("Use Array.compressors instead.", category=ZarrDeprecationWarning) - def compressor(self) -> numcodecs.abc.Codec | None: + def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. @@ -2254,7 +2253,7 @@ def compressor(self) -> numcodecs.abc.Codec | None: return self._async_array.compressor @property - def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]: + def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -3898,23 +3897,21 @@ def _build_parents( FiltersLike: TypeAlias = ( - Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec] + Iterable[dict[str, JSON] | ArrayArrayCodec | Numcodec] | ArrayArrayCodec - | Iterable[numcodecs.abc.Codec] - | numcodecs.abc.Codec + | Iterable[Numcodec] + | Numcodec | Literal["auto"] | None ) # Union of acceptable types for users to pass in for both v2 and v3 compressors -CompressorLike: TypeAlias = ( - dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None -) +CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | Numcodec | Literal["auto"] | None CompressorsLike: TypeAlias = ( - Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec] + Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec] | dict[str, JSON] | BytesBytesCodec - | numcodecs.abc.Codec + | Numcodec | Literal["auto"] | None ) @@ -4775,7 +4772,7 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: return serializer -def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] | None: +def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[Numcodec] | None: """ Given a data type, return the default filters for that data type. @@ -4797,7 +4794,7 @@ def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] | return None -def default_compressor_v2(dtype: ZDType[Any, Any]) -> numcodecs.abc.Codec: +def default_compressor_v2(dtype: ZDType[Any, Any]) -> Numcodec: """ Given a data type, return the default compressors for that data type. @@ -4805,7 +4802,7 @@ def default_compressor_v2(dtype: ZDType[Any, Any]) -> numcodecs.abc.Codec: """ from numcodecs import Zstd - return Zstd(level=0, checksum=False) + return Zstd(level=0, checksum=False) # type: ignore[no-any-return] def _parse_chunk_encoding_v2( @@ -4813,12 +4810,12 @@ def _parse_chunk_encoding_v2( compressor: CompressorsLike, filters: FiltersLike, dtype: ZDType[TBaseDType, TBaseScalar], -) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]: +) -> tuple[tuple[Numcodec, ...] | None, Numcodec | None]: """ Generate chunk encoding classes for Zarr format 2 arrays with optional defaults. """ - _filters: tuple[numcodecs.abc.Codec, ...] | None - _compressor: numcodecs.abc.Codec | None + _filters: tuple[Numcodec, ...] | None + _compressor: Numcodec | None if compressor is None or compressor == (): _compressor = None @@ -4839,7 +4836,7 @@ def _parse_chunk_encoding_v2( else: if isinstance(filters, Iterable): for idx, f in enumerate(filters): - if not isinstance(f, numcodecs.abc.Codec): + if not _is_numcodec(f): msg = ( "For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. " f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec." diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 9ad6b3bc42..efc6bd7949 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -5,13 +5,13 @@ from functools import cached_property from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast -import numcodecs.abc - from zarr.abc.metadata import Metadata +from zarr.abc.numcodec import Numcodec, _is_numcodec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.dtype import get_data_type_from_json from zarr.core.dtype.common import OBJECT_CODEC_IDS, DTypeSpec_V2 from zarr.errors import ZarrUserWarning +from zarr.registry import get_numcodec if TYPE_CHECKING: from typing import Literal, Self @@ -31,7 +31,6 @@ import json from dataclasses import dataclass, field, fields, replace -import numcodecs import numpy as np from zarr.core.array_spec import ArrayConfig, ArraySpec @@ -57,7 +56,7 @@ class ArrayV2MetadataDict(TypedDict): # Union of acceptable types for v2 compressors -CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None +CompressorLikev2: TypeAlias = dict[str, JSON] | Numcodec | None @dataclass(frozen=True, kw_only=True) @@ -67,9 +66,9 @@ class ArrayV2Metadata(Metadata): dtype: ZDType[TBaseDType, TBaseScalar] fill_value: int | float | str | bytes | None = None order: MemoryOrder = "C" - filters: tuple[numcodecs.abc.Codec, ...] | None = None + filters: tuple[Numcodec, ...] | None = None dimension_separator: Literal[".", "/"] = "." - compressor: numcodecs.abc.Codec | None + compressor: Numcodec | None attributes: dict[str, JSON] = field(default_factory=dict) zarr_format: Literal[2] = field(init=False, default=2) @@ -83,7 +82,7 @@ def __init__( order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", compressor: CompressorLikev2 = None, - filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, + filters: Iterable[Numcodec | dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, ) -> None: """ @@ -198,7 +197,7 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: def to_dict(self) -> dict[str, JSON]: zarray_dict = super().to_dict() - if isinstance(zarray_dict["compressor"], numcodecs.abc.Codec): + if _is_numcodec(zarray_dict["compressor"]): codec_config = zarray_dict["compressor"].get_config() # Hotfix for https://github.com/zarr-developers/zarr-python/issues/2647 if codec_config["id"] == "zstd" and not codec_config.get("checksum", False): @@ -213,7 +212,7 @@ def to_dict(self) -> dict[str, JSON]: raise TypeError("Invalid type for filters. Expected a list or tuple.") new_filters = [] for f in raw_filters: - if isinstance(f, numcodecs.abc.Codec): + if _is_numcodec(f): new_filters.append(f.get_config()) else: new_filters.append(f) @@ -263,20 +262,20 @@ def parse_zarr_format(data: object) -> Literal[2]: raise ValueError(f"Invalid value. Expected 2. Got {data}.") -def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None: +def parse_filters(data: object) -> tuple[Numcodec, ...] | None: """ Parse a potential tuple of filters """ - out: list[numcodecs.abc.Codec] = [] + out: list[Numcodec] = [] if data is None: return data if isinstance(data, Iterable): for idx, val in enumerate(data): - if isinstance(val, numcodecs.abc.Codec): + if _is_numcodec(val): out.append(val) elif isinstance(val, dict): - out.append(numcodecs.get_codec(val)) + out.append(get_numcodec(val)) # type: ignore[arg-type] else: msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead." raise TypeError(msg) @@ -286,20 +285,20 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None: else: return tuple(out) # take a single codec instance and wrap it in a tuple - if isinstance(data, numcodecs.abc.Codec): + if _is_numcodec(data): return (data,) msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead." raise TypeError(msg) -def parse_compressor(data: object) -> numcodecs.abc.Codec | None: +def parse_compressor(data: object) -> Numcodec | None: """ Parse a potential compressor. """ - if data is None or isinstance(data, numcodecs.abc.Codec): + if data is None or _is_numcodec(data): return data if isinstance(data, dict): - return numcodecs.get_codec(data) + return get_numcodec(data) # type: ignore[arg-type] msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead." raise ValueError(msg) diff --git a/src/zarr/registry.py b/src/zarr/registry.py index fc3ffd7f7c..46216205f7 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -17,8 +17,10 @@ ArrayBytesCodec, BytesBytesCodec, Codec, + CodecJSON_V2, CodecPipeline, ) + from zarr.abc.numcodec import Numcodec from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import JSON @@ -280,3 +282,31 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]: _collect_entrypoints() + + +def get_numcodec(data: CodecJSON_V2[str]) -> Numcodec: + """ + Resolve a numcodec codec from the numcodecs registry. + + This requires the Numcodecs package to be installed. + + Parameters + ---------- + data : CodecJSON_V2 + The JSON metadata for the codec. + + Returns + ------- + codec : Numcodec + + Examples + -------- + + >>> codec = get_codec({'id': 'zlib', 'level': 1}) + >>> codec + Zlib(level=1) + """ + + from numcodecs.registry import get_codec + + return get_codec(data) # type: ignore[no-any-return] diff --git a/tests/test_abc/__init__.py b/tests/test_abc/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_abc/test_codec.py b/tests/test_abc/test_codec.py new file mode 100644 index 0000000000..e0f9ddb7bb --- /dev/null +++ b/tests/test_abc/test_codec.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from zarr.abc.codec import _check_codecjson_v2 + + +def test_check_codecjson_v2_valid() -> None: + """ + Test that the _check_codecjson_v2 function works + """ + assert _check_codecjson_v2({"id": "gzip"}) + assert not _check_codecjson_v2({"id": 10}) + assert not _check_codecjson_v2([10, 11]) diff --git a/tests/test_api.py b/tests/test_api.py index 12acf80589..69fc9b5b16 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1283,7 +1283,7 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: dtype=src.dtype, overwrite=True, zarr_format=zarr_format, - compressors=compressors, + compressors=compressors, # type: ignore[arg-type] ) z[:10, :10] = src[:10, :10] diff --git a/tests/test_array.py b/tests/test_array.py index 74201a4017..a316ee127f 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -73,6 +73,7 @@ from .test_dtype.conftest import zdtype_examples if TYPE_CHECKING: + from zarr.abc.codec import CodecJSON_V3 from zarr.core.metadata.v3 import ArrayV3Metadata @@ -1346,11 +1347,11 @@ async def test_v2_chunk_encoding( assert arr.metadata.filters == filters_expected # Normalize for property getters - compressor_expected = () if compressor_expected is None else (compressor_expected,) - filters_expected = () if filters_expected is None else filters_expected + arr_compressors_expected = () if compressor_expected is None else (compressor_expected,) + arr_filters_expected = () if filters_expected is None else filters_expected - assert arr.compressors == compressor_expected - assert arr.filters == filters_expected + assert arr.compressors == arr_compressors_expected + assert arr.filters == arr_filters_expected @staticmethod @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthUTF8()]) @@ -1388,11 +1389,12 @@ async def test_default_filters_compressors( if default_filters is None: expected_filters = () else: - expected_filters = default_filters + expected_filters = default_filters # type: ignore[assignment] + if default_compressors is None: expected_compressors = () else: - expected_compressors = (default_compressors,) + expected_compressors = (default_compressors,) # type: ignore[assignment] expected_serializer = None else: raise ValueError(f"Invalid zarr_format: {zarr_format}") @@ -1696,7 +1698,7 @@ def test_roundtrip_numcodecs() -> None: {"name": "numcodecs.shuffle", "configuration": {"elementsize": 2}}, {"name": "numcodecs.zlib", "configuration": {"level": 4}}, ] - filters = [ + filters: list[CodecJSON_V3] = [ { "name": "numcodecs.fixedscaleoffset", "configuration": { @@ -1717,8 +1719,8 @@ def test_roundtrip_numcodecs() -> None: shape=(720, 1440), chunks=(720, 1440), dtype="float64", - compressors=compressors, - filters=filters, + compressors=compressors, # type: ignore[arg-type] + filters=filters, # type: ignore[arg-type] fill_value=-9.99, dimension_names=["lat", "lon"], ) diff --git a/tests/test_codecs/test_numcodecs.py b/tests/test_codecs/test_numcodecs.py new file mode 100644 index 0000000000..1c4d550587 --- /dev/null +++ b/tests/test_codecs/test_numcodecs.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from numcodecs import GZip + +from zarr.abc.numcodec import _is_numcodec, _is_numcodec_cls +from zarr.registry import get_numcodec + + +def test_get_numcodec() -> None: + assert get_numcodec({"id": "gzip", "level": 2}) == GZip(level=2) # type: ignore[typeddict-unknown-key] + + +def test_is_numcodec() -> None: + """ + Test the _is_numcodec function + """ + assert _is_numcodec(GZip()) + + +def test_is_numcodec_cls() -> None: + """ + Test the _is_numcodec_cls function + """ + assert _is_numcodec_cls(GZip) diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py index 6fe1863464..cf0905daca 100644 --- a/tests/test_codecs/test_vlen.py +++ b/tests/test_codecs/test_vlen.py @@ -40,7 +40,7 @@ def test_vlen_string( chunks=data.shape, dtype=data.dtype, fill_value="", - compressors=compressor, + compressors=compressor, # type: ignore[arg-type] ) assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy diff --git a/tests/test_regression/test_v2_dtype_regression.py b/tests/test_regression/test_v2_dtype_regression.py index 9702ca7d23..ffe273490d 100644 --- a/tests/test_regression/test_v2_dtype_regression.py +++ b/tests/test_regression/test_v2_dtype_regression.py @@ -4,7 +4,6 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal -import numcodecs import numpy as np import pytest from numcodecs import LZ4, LZMA, Blosc, GZip, VLenBytes, VLenUTF8, Zstd @@ -13,6 +12,7 @@ import zarr.abc import zarr.abc.codec import zarr.codecs as zarrcodecs +from zarr.abc.numcodec import Numcodec from zarr.core.array import Array from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from zarr.core.dtype.npy.bytes import VariableLengthBytes @@ -40,12 +40,12 @@ def runner_installed() -> bool: class ArrayParams: values: np.ndarray[tuple[int], np.dtype[np.generic]] fill_value: np.generic | str | int | bytes - filters: tuple[numcodecs.abc.Codec, ...] = () + filters: tuple[Numcodec, ...] = () serializer: str | None = None - compressor: numcodecs.abc.Codec + compressor: Numcodec -basic_codecs = GZip(), Blosc(), LZ4(), LZMA(), Zstd() +basic_codecs: tuple[Numcodec, ...] = GZip(), Blosc(), LZ4(), LZMA(), Zstd() basic_dtypes = "|b", ">i2", ">i4", ">f4", ">f8", "c8", "c16", "M8[10us]", "m8[4ps]" string_dtypes = "U4"