Skip to content

Commit 791201f

Browse files
authored
Merge branch 'main' into mkdocs
2 parents cf33263 + c21d1f9 commit 791201f

File tree

22 files changed

+277
-83
lines changed

22 files changed

+277
-83
lines changed

.github/workflows/releases.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
needs: [build_artifacts]
3737
runs-on: ubuntu-latest
3838
steps:
39-
- uses: actions/download-artifact@v4
39+
- uses: actions/download-artifact@v5
4040
with:
4141
name: releases
4242
path: dist
@@ -51,7 +51,7 @@ jobs:
5151
runs-on: ubuntu-latest
5252
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
5353
steps:
54-
- uses: actions/download-artifact@v4
54+
- uses: actions/download-artifact@v5
5555
with:
5656
name: releases
5757
path: dist

changes/3318.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Define a ``Protocol`` to model the ``numcodecs.abc.Codec`` interface. This is groundwork toward
2+
making ``numcodecs`` an optional dependency for ``zarr-python``.

changes/3371.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure that tests for executable examples are run in a fresh python environment.

changes/3372.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make certain imports in ``zarr.abc.store`` local to method definitions. This minimizes the risk of
2+
circular imports when adding new classes to ``zarr.abc.store``.

src/zarr/abc/codec.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from typing import TYPE_CHECKING, Generic, TypeVar
4+
from collections.abc import Mapping
5+
from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar
6+
7+
from typing_extensions import ReadOnly, TypedDict
58

69
from zarr.abc.metadata import Metadata
710
from zarr.core.buffer import Buffer, NDBuffer
8-
from zarr.core.common import ChunkCoords, concurrent_map
11+
from zarr.core.common import ChunkCoords, NamedConfig, concurrent_map
912
from zarr.core.config import config
1013

1114
if TYPE_CHECKING:
@@ -34,6 +37,27 @@
3437
CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer)
3538
CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer)
3639

40+
TName = TypeVar("TName", bound=str, covariant=True)
41+
42+
43+
class CodecJSON_V2(TypedDict, Generic[TName]):
44+
"""The JSON representation of a codec for Zarr V2"""
45+
46+
id: ReadOnly[TName]
47+
48+
49+
def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]:
50+
return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
51+
52+
53+
CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
54+
"""The JSON representation of a codec for Zarr V3."""
55+
56+
# The widest type we will *accept* for a codec JSON
57+
# This covers v2 and v3
58+
CodecJSON = str | Mapping[str, object]
59+
"""The widest type of JSON-like input that could specify a codec."""
60+
3761

3862
class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]):
3963
"""Generic base class for codecs.

src/zarr/abc/numcodec.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from typing import Any, Self, TypeGuard
2+
3+
from typing_extensions import Protocol
4+
5+
6+
class Numcodec(Protocol):
7+
"""
8+
A protocol that models the ``numcodecs.abc.Codec`` interface.
9+
10+
This protocol should be considered experimental. Expect the type annotations for ``buf`` and
11+
``out`` to narrow in the future.
12+
"""
13+
14+
codec_id: str
15+
16+
def encode(self, buf: Any) -> Any:
17+
"""Encode data from ``buf``.
18+
19+
Parameters
20+
----------
21+
buf : Any
22+
Data to be encoded.
23+
24+
Returns
25+
-------
26+
enc: Any
27+
Encoded data.
28+
"""
29+
...
30+
31+
def decode(self, buf: Any, out: Any | None = None) -> Any:
32+
"""
33+
Decode data in ``buf``.
34+
35+
Parameters
36+
----------
37+
buf : Any
38+
Encoded data.
39+
out : Any
40+
Writeable buffer to store decoded data. If provided, this buffer must
41+
be exactly the right size to store the decoded data.
42+
43+
Returns
44+
-------
45+
dec : Any
46+
Decoded data.
47+
"""
48+
...
49+
50+
def get_config(self) -> Any:
51+
"""
52+
Return a JSON-serializable configuration dictionary for this
53+
codec. Must include an ``'id'`` field with the codec identifier.
54+
"""
55+
...
56+
57+
@classmethod
58+
def from_config(cls, config: Any) -> Self:
59+
"""
60+
Instantiate a codec from a configuration dictionary.
61+
62+
Parameters
63+
----------
64+
config : Any
65+
A configuration dictionary for this codec.
66+
"""
67+
...
68+
69+
70+
def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]:
71+
"""
72+
Check if the given object is a class implements the Numcodec protocol.
73+
74+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
75+
members (i.e., attributes), so we use this function to manually check for the presence of the
76+
required attributes and methods on a given object.
77+
"""
78+
return (
79+
isinstance(obj, type)
80+
and hasattr(obj, "codec_id")
81+
and isinstance(obj.codec_id, str)
82+
and hasattr(obj, "encode")
83+
and callable(obj.encode)
84+
and hasattr(obj, "decode")
85+
and callable(obj.decode)
86+
and hasattr(obj, "get_config")
87+
and callable(obj.get_config)
88+
and hasattr(obj, "from_config")
89+
and callable(obj.from_config)
90+
)
91+
92+
93+
def _is_numcodec(obj: object) -> TypeGuard[Numcodec]:
94+
"""
95+
Check if the given object implements the Numcodec protocol.
96+
97+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
98+
members (i.e., attributes), so we use this function to manually check for the presence of the
99+
required attributes and methods on a given object.
100+
"""
101+
return _is_numcodec_cls(type(obj))

src/zarr/abc/store.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@
66
from itertools import starmap
77
from typing import TYPE_CHECKING, Protocol, runtime_checkable
88

9-
from zarr.core.buffer.core import default_buffer_prototype
10-
from zarr.core.common import concurrent_map
11-
from zarr.core.config import config
12-
139
if TYPE_CHECKING:
1410
from collections.abc import AsyncGenerator, AsyncIterator, Iterable
1511
from types import TracebackType
@@ -438,6 +434,9 @@ async def getsize(self, key: str) -> int:
438434
# Note to implementers: this default implementation is very inefficient since
439435
# it requires reading the entire object. Many systems will have ways to get the
440436
# size of an object without reading it.
437+
# avoid circular import
438+
from zarr.core.buffer.core import default_buffer_prototype
439+
441440
value = await self.get(key, prototype=default_buffer_prototype())
442441
if value is None:
443442
raise FileNotFoundError(key)
@@ -476,6 +475,11 @@ async def getsize_prefix(self, prefix: str) -> int:
476475
# on to getting sizes. Ideally we would overlap those two, which should
477476
# improve tail latency and might reduce memory pressure (since not all keys
478477
# would be in memory at once).
478+
479+
# avoid circular import
480+
from zarr.core.common import concurrent_map
481+
from zarr.core.config import config
482+
479483
keys = [(x,) async for x in self.list_prefix(prefix)]
480484
limit = config.get("async.concurrency")
481485
sizes = await concurrent_map(keys, self.getsize, limit=limit)

src/zarr/api/asynchronous.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@
5252
if TYPE_CHECKING:
5353
from collections.abc import Iterable
5454

55-
import numcodecs.abc
56-
5755
from zarr.abc.codec import Codec
56+
from zarr.abc.numcodec import Numcodec
5857
from zarr.core.buffer import NDArrayLikeOrScalar
5958
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
6059
from zarr.storage import StoreLike
@@ -877,7 +876,7 @@ async def create(
877876
overwrite: bool = False,
878877
path: PathLike | None = None,
879878
chunk_store: StoreLike | None = None,
880-
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
879+
filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
881880
cache_metadata: bool | None = None,
882881
cache_attrs: bool | None = None,
883882
read_only: bool | None = None,

src/zarr/api/synchronous.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
if TYPE_CHECKING:
1616
from collections.abc import Iterable
1717

18-
import numcodecs.abc
1918
import numpy as np
2019
import numpy.typing as npt
2120

2221
from zarr.abc.codec import Codec
22+
from zarr.abc.numcodec import Numcodec
2323
from zarr.api.asynchronous import ArrayLike, PathLike
2424
from zarr.core.array import (
2525
CompressorsLike,
@@ -610,7 +610,7 @@ def create(
610610
overwrite: bool = False,
611611
path: PathLike | None = None,
612612
chunk_store: StoreLike | None = None,
613-
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
613+
filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
614614
cache_metadata: bool | None = None,
615615
cache_attrs: bool | None = None,
616616
read_only: bool | None = None,

src/zarr/codecs/_v2.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,22 @@
44
from dataclasses import dataclass
55
from typing import TYPE_CHECKING
66

7-
import numcodecs
87
import numpy as np
98
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
109

1110
from zarr.abc.codec import ArrayBytesCodec
1211
from zarr.registry import get_ndbuffer_class
1312

1413
if TYPE_CHECKING:
15-
import numcodecs.abc
16-
14+
from zarr.abc.numcodec import Numcodec
1715
from zarr.core.array_spec import ArraySpec
1816
from zarr.core.buffer import Buffer, NDBuffer
1917

2018

2119
@dataclass(frozen=True)
2220
class V2Codec(ArrayBytesCodec):
23-
filters: tuple[numcodecs.abc.Codec, ...] | None
24-
compressor: numcodecs.abc.Codec | None
21+
filters: tuple[Numcodec, ...] | None
22+
compressor: Numcodec | None
2523

2624
is_fixed_size = False
2725

@@ -86,7 +84,6 @@ async def _encode_single(
8684
if self.filters:
8785
for f in self.filters:
8886
chunk = await asyncio.to_thread(f.encode, chunk)
89-
9087
# check object encoding
9188
if ensure_ndarray_like(chunk).dtype == object:
9289
raise RuntimeError("cannot write object array without object codec")
@@ -96,7 +93,6 @@ async def _encode_single(
9693
cdata = await asyncio.to_thread(self.compressor.encode, chunk)
9794
else:
9895
cdata = chunk
99-
10096
cdata = ensure_bytes(cdata)
10197
return chunk_spec.prototype.buffer.from_bytes(cdata)
10298

0 commit comments

Comments
 (0)