zarr-developers · d-v-b · Aug 13, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/changes/3318.misc.rst b/changes/3318.misc.rst
@@ -0,0 +1,2 @@
+Define a ``Protocol`` to model the ``numcodecs.abc.Codec`` interface. This is groundwork toward
+making ``numcodecs`` an optional dependency for ``zarr-python``.
diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from typing import TYPE_CHECKING, Generic, TypeVar
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar
+
+from typing_extensions import ReadOnly, TypedDict
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer import Buffer, NDBuffer
-from zarr.core.common import ChunkCoords, concurrent_map
+from zarr.core.common import ChunkCoords, NamedConfig, concurrent_map
 from zarr.core.config import config
 
 if TYPE_CHECKING:
@@ -34,6 +37,27 @@
 CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer)
 CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer)
 
+TName = TypeVar("TName", bound=str, covariant=True)
+
+
+class CodecJSON_V2(TypedDict, Generic[TName]):
+    """The JSON representation of a codec for Zarr V2"""
+
+    id: ReadOnly[TName]
+
+
+def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]:
+    return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
+
+
+CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
+"""The JSON representation of a codec for Zarr V3."""
+
+# The widest type we will *accept* for a codec JSON
+# This covers v2 and v3
+CodecJSON = str | Mapping[str, object]
+"""The widest type of JSON-like input that could specify a codec."""
+
 
 class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]):
     """Generic base class for codecs.

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
@@ -46,9 +46,8 @@
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    import numcodecs.abc
-
     from zarr.abc.codec import Codec
+    from zarr.codecs._v2 import Numcodec
     from zarr.core.buffer import NDArrayLikeOrScalar
     from zarr.core.chunk_key_encodings import ChunkKeyEncoding
     from zarr.storage import StoreLike
@@ -871,7 +870,7 @@ async def create(
     overwrite: bool = False,
     path: PathLike | None = None,
     chunk_store: StoreLike | None = None,
-    filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+    filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
     cache_metadata: bool | None = None,
     cache_attrs: bool | None = None,
     read_only: bool | None = None,

diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
@@ -14,12 +14,12 @@
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    import numcodecs.abc
     import numpy as np
     import numpy.typing as npt
 
     from zarr.abc.codec import Codec
     from zarr.api.asynchronous import ArrayLike, PathLike
+    from zarr.codecs._v2 import Numcodec
     from zarr.core.array import (
         CompressorsLike,
         FiltersLike,
@@ -609,7 +609,7 @@ def create(
     overwrite: bool = False,
     path: PathLike | None = None,
     chunk_store: StoreLike | None = None,
-    filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+    filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
     cache_metadata: bool | None = None,
     cache_attrs: bool | None = None,
     read_only: bool | None = None,

diff --git a/src/zarr/codecs/_numcodecs.py b/src/zarr/codecs/_numcodecs.py
@@ -0,0 +1,30 @@
+from zarr.abc.codec import CodecJSON_V2
+from zarr.codecs._v2 import Numcodec
+
+
+def get_numcodec(data: CodecJSON_V2[str]) -> Numcodec:
+    """
+    Resolve a numcodec codec from the numcodecs registry.
+
+    This requires the Numcodecs package to be installed.
+
+    Parameters
+    ----------
+    data : CodecJSON_V2
+        The JSON metadata for the codec.
+
+    Returns
+    -------
+    codec : Numcodec
+
+    Examples
+    --------
+
+    >>> codec = get_codec({'id': 'zlib', 'level': 1})
+    >>> codec
+    Zlib(level=1)
+    """
+
+    from numcodecs.registry import get_codec
+
+    return get_codec(data)  # type: ignore[no-any-return]
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
@@ -2,26 +2,77 @@
 
 import asyncio
 from dataclasses import dataclass
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, ClassVar, Self, TypeGuard
 
-import numcodecs
 import numpy as np
 from numcodecs.compat import ensure_bytes, ensure_ndarray_like
+from typing_extensions import Protocol
 
-from zarr.abc.codec import ArrayBytesCodec
+from zarr.abc.codec import ArrayBytesCodec, CodecJSON_V2
 from zarr.registry import get_ndbuffer_class
 
 if TYPE_CHECKING:
-    import numcodecs.abc
-
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import Buffer, NDBuffer
 
 
+class Numcodec(Protocol):
+    """
+    A protocol that models the ``numcodecs.abc.Codec`` interface.
+    """
+
+    codec_id: ClassVar[str]
+
+    def encode(self, buf: Buffer | NDBuffer) -> Buffer | NDBuffer: ...
+
+    def decode(
+        self, buf: Buffer | NDBuffer, out: Buffer | NDBuffer | None = None
+    ) -> Buffer | NDBuffer: ...
+
+    def get_config(self) -> CodecJSON_V2[str]: ...
+
+    @classmethod
+    def from_config(cls, config: CodecJSON_V2[str]) -> Self: ...
+
+
+def _is_numcodec(obj: object) -> TypeGuard[Numcodec]:
+    """
+    Check if the given object implements the Numcodec protocol.
+
+    The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
+    members (i.e., attributes), so we use this function to manually check for the presence of the
+    required attributes and methods on a given object.
+    """
+    return _is_numcodec_cls(type(obj))
+
+
+def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]:
+    """
+    Check if the given object is a class implements the Numcodec protocol.
+
+    The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
+    members (i.e., attributes), so we use this function to manually check for the presence of the
+    required attributes and methods on a given object.
+    """
+    return (
+        isinstance(obj, type)
+        and hasattr(obj, "codec_id")
+        and isinstance(obj.codec_id, str)
+        and hasattr(obj, "encode")
+        and callable(obj.encode)
+        and hasattr(obj, "decode")
+        and callable(obj.decode)
+        and hasattr(obj, "get_config")
+        and callable(obj.get_config)
+        and hasattr(obj, "from_config")
+        and callable(obj.from_config)
+    )
+
+
 @dataclass(frozen=True)
 class V2Codec(ArrayBytesCodec):
-    filters: tuple[numcodecs.abc.Codec, ...] | None
-    compressor: numcodecs.abc.Codec | None
+    filters: tuple[Numcodec, ...] | None
+    compressor: Numcodec | None
 
     is_fixed_size = False
 
@@ -33,9 +84,9 @@ async def _decode_single(
         cdata = chunk_bytes.as_array_like()
         # decompress
         if self.compressor:
-            chunk = await asyncio.to_thread(self.compressor.decode, cdata)
+            chunk = await asyncio.to_thread(self.compressor.decode, cdata)  # type: ignore[arg-type]
         else:
-            chunk = cdata
+            chunk = cdata  # type: ignore[assignment]
 
         # apply filters
         if self.filters:
@@ -56,7 +107,7 @@ async def _decode_single(
                 # is an object array. In this case, we need to convert the object
                 # array to the correct dtype.
 
-                chunk = np.array(chunk).astype(chunk_spec.dtype.to_native_dtype())
+                chunk = np.array(chunk).astype(chunk_spec.dtype.to_native_dtype())  # type: ignore[assignment]
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
@@ -85,17 +136,17 @@ async def _encode_single(
         # apply filters
         if self.filters:
             for f in self.filters:
-                chunk = await asyncio.to_thread(f.encode, chunk)
+                chunk = await asyncio.to_thread(f.encode, chunk)  # type: ignore[arg-type]
 
         # check object encoding
         if ensure_ndarray_like(chunk).dtype == object:
             raise RuntimeError("cannot write object array without object codec")
 
         # compress
         if self.compressor:
-            cdata = await asyncio.to_thread(self.compressor.encode, chunk)
+            cdata = await asyncio.to_thread(self.compressor.encode, chunk)  # type: ignore[arg-type]
         else:
-            cdata = chunk
+            cdata = chunk  # type: ignore[assignment]
 
         cdata = ensure_bytes(cdata)
         return chunk_spec.prototype.buffer.from_bytes(cdata)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -27,7 +27,7 @@
 import zarr
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.abc.store import Store, set_or_delete
-from zarr.codecs._v2 import V2Codec
+from zarr.codecs._v2 import Numcodec, V2Codec
 from zarr.codecs.bytes import BytesCodec
 from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
 from zarr.codecs.zstd import ZstdCodec
@@ -607,7 +607,7 @@ async def _create(
         chunks: ShapeLike | None = None,
         dimension_separator: Literal[".", "/"] | None = None,
         order: MemoryOrder | None = None,
-        filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+        filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
         compressor: CompressorLike = "auto",
         # runtime
         overwrite: bool = False,
@@ -818,7 +818,7 @@ def _create_metadata_v2(
         order: MemoryOrder,
         dimension_separator: Literal[".", "/"] | None = None,
         fill_value: Any | None = DEFAULT_FILL_VALUE,
-        filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+        filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
         compressor: CompressorLikev2 = None,
         attributes: dict[str, JSON] | None = None,
     ) -> ArrayV2Metadata:
@@ -856,7 +856,7 @@ async def _create_v2(
         config: ArrayConfig,
         dimension_separator: Literal[".", "/"] | None = None,
         fill_value: Any | None = DEFAULT_FILL_VALUE,
-        filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
+        filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
         compressor: CompressorLike = "auto",
         attributes: dict[str, JSON] | None = None,
         overwrite: bool = False,
@@ -3898,7 +3898,7 @@ def _build_parents(
 
 
 FiltersLike: TypeAlias = (
-    Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec]
+    Iterable[dict[str, JSON] | ArrayArrayCodec | Numcodec]
     | ArrayArrayCodec
     | Iterable[numcodecs.abc.Codec]
     | numcodecs.abc.Codec
@@ -3911,10 +3911,10 @@ def _build_parents(
 )
 
 CompressorsLike: TypeAlias = (
-    Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
+    Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec]
     | dict[str, JSON]
     | BytesBytesCodec
-    | numcodecs.abc.Codec
+    | Numcodec
     | Literal["auto"]
     | None
 )
@@ -4944,7 +4944,7 @@ def _parse_deprecated_compressor(
             # "no compression"
             compressors = ()
         else:
-            compressors = (compressor,)
+            compressors = (compressor,)  # type: ignore[assignment]
     elif zarr_format == 2 and compressor == compressors == "auto":
         compressors = ({"id": "blosc"},)
     return compressors

diff --git a/tests/test_api.py b/tests/test_api.py
@@ -1282,7 +1282,7 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
             dtype=src.dtype,
             overwrite=True,
             zarr_format=zarr_format,
-            compressors=compressors,
+            compressors=compressors,  # type: ignore[arg-type]
         )
         z[:10, :10] = src[:10, :10]
 

diff --git a/tests/test_array.py b/tests/test_array.py
@@ -1684,7 +1684,7 @@ def test_roundtrip_numcodecs() -> None:
         shape=(720, 1440),
         chunks=(720, 1440),
         dtype="float64",
-        compressors=compressors,
+        compressors=compressors,  # type: ignore[arg-type]
         filters=filters,
         fill_value=-9.99,
         dimension_names=["lat", "lon"],

diff --git a/tests/test_codecs/test_numcodecs.py b/tests/test_codecs/test_numcodecs.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from numcodecs import GZip
+
+from zarr.codecs._numcodecs import get_numcodec
+from zarr.codecs._v2 import _is_numcodec, _is_numcodec_cls
+
+
+def test_get_numcodec() -> None:
+    assert get_numcodec({"id": "gzip", "level": 2}) == GZip(level=2)  # type: ignore[typeddict-unknown-key]
+
+
+def test_is_numcodec() -> None:
+    """
+    Test the _is_numcodec function
+    """
+    assert _is_numcodec(GZip())
+
+
+def test_is_numcodec_cls() -> None:
+    """
+    Test the _is_numcodec_cls function
+    """
+    assert _is_numcodec_cls(GZip)
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
@@ -40,7 +40,7 @@ def test_vlen_string(
         chunks=data.shape,
         dtype=data.dtype,
         fill_value="",
-        compressors=compressor,
+        compressors=compressor,  # type: ignore[arg-type]
     )
     assert isinstance(a.metadata, ArrayV3Metadata)  # needed for mypy
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Define a ``Protocol`` to model the ``numcodecs.abc.Codec`` interface. This is groundwork toward
		making ``numcodecs`` an optional dependency for ``zarr-python``.