| 
25 | 25 | 
 
  | 
26 | 26 | from __future__ import annotations  | 
27 | 27 | 
 
  | 
28 |  | -import asyncio  | 
29 |  | -import math  | 
30 |  | -from dataclasses import dataclass, replace  | 
31 |  | -from functools import cached_property  | 
32 | 28 | from importlib.metadata import version  | 
33 |  | -from typing import Any, Self  | 
34 |  | -from warnings import warn  | 
35 | 29 | 
 
  | 
36 |  | -import numpy as np  | 
37 | 30 | from packaging.version import Version  | 
38 | 31 | 
 
  | 
39 |  | -import numcodecs  | 
40 |  | - | 
41 | 32 | try:  | 
42 | 33 |     import zarr  # noqa: F401  | 
43 | 34 | 
 
  | 
44 |  | -    if Version(version('zarr')) < Version("3.0.0"):  # pragma: no cover  | 
45 |  | -        raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.")  | 
 | 35 | +    zarr_version = version('zarr')  | 
 | 36 | +    if Version(zarr_version) < Version("3.0.8"):  # pragma: no cover  | 
 | 37 | +        msg = f"zarr 3.0.9 or later is required to use the numcodecs zarr integration. Got {zarr_version}."  | 
 | 38 | +        raise ImportError(msg)  | 
46 | 39 | except ImportError as e:  # pragma: no cover  | 
47 |  | -    raise ImportError(  | 
48 |  | -        "zarr 3.0.0 or later is required to use the numcodecs zarr integration."  | 
49 |  | -    ) from e  | 
50 |  | - | 
51 |  | -from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec  | 
52 |  | -from zarr.abc.metadata import Metadata  | 
53 |  | -from zarr.core.array_spec import ArraySpec  | 
54 |  | -from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer  | 
55 |  | -from zarr.core.buffer.cpu import as_numpy_array_wrapper  | 
56 |  | -from zarr.core.common import JSON, parse_named_configuration, product  | 
57 |  | - | 
58 |  | -CODEC_PREFIX = "numcodecs."  | 
59 |  | - | 
60 |  | - | 
61 |  | -def _from_zarr_dtype(dtype: Any) -> np.dtype:  | 
62 |  | -    """  | 
63 |  | -    Get a numpy data type from an array spec, depending on the zarr version.  | 
64 |  | -    """  | 
65 |  | -    if Version(version('zarr')) >= Version("3.1.0"):  | 
66 |  | -        return dtype.to_native_dtype()  | 
67 |  | -    return dtype  # pragma: no cover  | 
68 |  | - | 
69 |  | - | 
70 |  | -def _to_zarr_dtype(dtype: np.dtype) -> Any:  | 
71 |  | -    if Version(version('zarr')) >= Version("3.1.0"):  | 
72 |  | -        from zarr.dtype import parse_data_type  | 
73 |  | - | 
74 |  | -        return parse_data_type(dtype, zarr_format=3)  | 
75 |  | -    return dtype  # pragma: no cover  | 
76 |  | - | 
77 |  | - | 
78 |  | -def _expect_name_prefix(codec_name: str) -> str:  | 
79 |  | -    if not codec_name.startswith(CODEC_PREFIX):  | 
80 |  | -        raise ValueError(  | 
81 |  | -            f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead."  | 
82 |  | -        )  # pragma: no cover  | 
83 |  | -    return codec_name.removeprefix(CODEC_PREFIX)  | 
84 |  | - | 
85 |  | - | 
86 |  | -def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]:  | 
87 |  | -    parsed_name, parsed_configuration = parse_named_configuration(data)  | 
88 |  | -    if not parsed_name.startswith(CODEC_PREFIX):  | 
89 |  | -        raise ValueError(  | 
90 |  | -            f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead."  | 
91 |  | -        )  # pragma: no cover  | 
92 |  | -    id = _expect_name_prefix(parsed_name)  | 
93 |  | -    return {"id": id, **parsed_configuration}  | 
94 |  | - | 
95 |  | - | 
96 |  | -@dataclass(frozen=True)  | 
97 |  | -class _NumcodecsCodec(Metadata):  | 
98 |  | -    codec_name: str  | 
99 |  | -    codec_config: dict[str, JSON]  | 
100 |  | - | 
101 |  | -    def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs):  | 
102 |  | -        """To be used only when creating the actual public-facing codec class."""  | 
103 |  | -        super().__init_subclass__(**kwargs)  | 
104 |  | -        if codec_name is not None:  | 
105 |  | -            namespace = codec_name  | 
106 |  | - | 
107 |  | -            cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}"  | 
108 |  | -            cls.codec_name = f"{CODEC_PREFIX}{namespace}"  | 
109 |  | -            cls.__doc__ = f"""  | 
110 |  | -            See :class:`{cls_name}` for more details and parameters.  | 
111 |  | -            """  | 
112 |  | - | 
113 |  | -    def __init__(self, **codec_config: JSON) -> None:  | 
114 |  | -        if not self.codec_name:  | 
115 |  | -            raise ValueError(  | 
116 |  | -                "The codec name needs to be supplied through the `codec_name` attribute."  | 
117 |  | -            )  # pragma: no cover  | 
118 |  | -        unprefixed_codec_name = _expect_name_prefix(self.codec_name)  | 
119 |  | - | 
120 |  | -        if "id" not in codec_config:  | 
121 |  | -            codec_config = {"id": unprefixed_codec_name, **codec_config}  | 
122 |  | -        elif codec_config["id"] != unprefixed_codec_name:  | 
123 |  | -            raise ValueError(  | 
124 |  | -                f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}."  | 
125 |  | -            )  # pragma: no cover  | 
126 |  | - | 
127 |  | -        object.__setattr__(self, "codec_config", codec_config)  | 
128 |  | -        warn(  | 
129 |  | -            "Numcodecs codecs are not in the Zarr version 3 specification and "  | 
130 |  | -            "may not be supported by other zarr implementations.",  | 
131 |  | -            category=UserWarning,  | 
132 |  | -            stacklevel=2,  | 
133 |  | -        )  | 
134 |  | - | 
135 |  | -    @cached_property  | 
136 |  | -    def _codec(self) -> numcodecs.abc.Codec:  | 
137 |  | -        return numcodecs.get_codec(self.codec_config)  | 
138 |  | - | 
139 |  | -    @classmethod  | 
140 |  | -    def from_dict(cls, data: dict[str, JSON]) -> Self:  | 
141 |  | -        codec_config = _parse_codec_configuration(data)  | 
142 |  | -        return cls(**codec_config)  | 
143 |  | - | 
144 |  | -    def to_dict(self) -> dict[str, JSON]:  | 
145 |  | -        codec_config = self.codec_config.copy()  | 
146 |  | -        codec_config.pop("id", None)  | 
147 |  | -        return {  | 
148 |  | -            "name": self.codec_name,  | 
149 |  | -            "configuration": codec_config,  | 
150 |  | -        }  | 
151 |  | - | 
152 |  | -    def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:  | 
153 |  | -        raise NotImplementedError  # pragma: no cover  | 
154 |  | - | 
155 |  | -    # Override __repr__ because dynamically constructed classes don't seem to work otherwise  | 
156 |  | -    def __repr__(self) -> str:  | 
157 |  | -        codec_config = self.codec_config.copy()  | 
158 |  | -        codec_config.pop("id", None)  | 
159 |  | -        return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})"  | 
160 |  | - | 
161 |  | - | 
162 |  | -class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec):  | 
163 |  | -    def __init__(self, **codec_config: JSON) -> None:  | 
164 |  | -        super().__init__(**codec_config)  | 
165 |  | - | 
166 |  | -    async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer:  | 
167 |  | -        return await asyncio.to_thread(  | 
168 |  | -            as_numpy_array_wrapper,  | 
169 |  | -            self._codec.decode,  | 
170 |  | -            chunk_bytes,  | 
171 |  | -            chunk_spec.prototype,  | 
172 |  | -        )  | 
173 |  | - | 
174 |  | -    def _encode(self, chunk_bytes: Buffer, prototype: BufferPrototype) -> Buffer:  | 
175 |  | -        encoded = self._codec.encode(chunk_bytes.as_array_like())  | 
176 |  | -        if isinstance(encoded, np.ndarray):  # Required for checksum codecs  | 
177 |  | -            return prototype.buffer.from_bytes(encoded.tobytes())  | 
178 |  | -        return prototype.buffer.from_bytes(encoded)  | 
179 |  | - | 
180 |  | -    async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer:  | 
181 |  | -        return await asyncio.to_thread(self._encode, chunk_bytes, chunk_spec.prototype)  | 
182 |  | - | 
183 |  | - | 
184 |  | -class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec):  | 
185 |  | -    def __init__(self, **codec_config: JSON) -> None:  | 
186 |  | -        super().__init__(**codec_config)  | 
187 |  | - | 
188 |  | -    async def _decode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:  | 
189 |  | -        chunk_ndarray = chunk_array.as_ndarray_like()  | 
190 |  | -        out = await asyncio.to_thread(self._codec.decode, chunk_ndarray)  | 
191 |  | -        return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))  | 
192 |  | - | 
193 |  | -    async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:  | 
194 |  | -        chunk_ndarray = chunk_array.as_ndarray_like()  | 
195 |  | -        out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)  | 
196 |  | -        return chunk_spec.prototype.nd_buffer.from_ndarray_like(out)  | 
197 |  | - | 
198 |  | - | 
199 |  | -class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec):  | 
200 |  | -    def __init__(self, **codec_config: JSON) -> None:  | 
201 |  | -        super().__init__(**codec_config)  | 
202 |  | - | 
203 |  | -    async def _decode_single(self, chunk_buffer: Buffer, chunk_spec: ArraySpec) -> NDBuffer:  | 
204 |  | -        chunk_bytes = chunk_buffer.to_bytes()  | 
205 |  | -        out = await asyncio.to_thread(self._codec.decode, chunk_bytes)  | 
206 |  | -        return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))  | 
207 |  | - | 
208 |  | -    async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) -> Buffer:  | 
209 |  | -        chunk_ndarray = chunk_ndbuffer.as_ndarray_like()  | 
210 |  | -        out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)  | 
211 |  | -        return chunk_spec.prototype.buffer.from_bytes(out)  | 
212 |  | - | 
213 |  | - | 
214 |  | -# bytes-to-bytes codecs  | 
215 |  | -class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"):  | 
216 |  | -    pass  | 
217 |  | - | 
218 |  | - | 
219 |  | -class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"):  | 
220 |  | -    pass  | 
221 |  | - | 
222 |  | - | 
223 |  | -class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"):  | 
224 |  | -    pass  | 
225 |  | - | 
226 |  | - | 
227 |  | -class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"):  | 
228 |  | -    pass  | 
229 |  | - | 
230 |  | - | 
231 |  | -class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"):  | 
232 |  | -    pass  | 
233 |  | - | 
234 |  | - | 
235 |  | -class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"):  | 
236 |  | -    pass  | 
237 |  | - | 
238 |  | - | 
239 |  | -class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"):  | 
240 |  | -    pass  | 
241 |  | - | 
242 |  | - | 
243 |  | -class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"):  | 
244 |  | -    def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:  | 
245 |  | -        if self.codec_config.get("elementsize") is None:  | 
246 |  | -            dtype = _from_zarr_dtype(array_spec.dtype)  | 
247 |  | -            return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize})  | 
248 |  | -        return self  # pragma: no cover  | 
249 |  | - | 
250 |  | - | 
251 |  | -# array-to-array codecs ("filters")  | 
252 |  | -class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"):  | 
253 |  | -    def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:  | 
254 |  | -        if astype := self.codec_config.get("astype"):  | 
255 |  | -            dtype = _to_zarr_dtype(np.dtype(astype))  # type: ignore[call-overload]  | 
256 |  | -            return replace(chunk_spec, dtype=dtype)  | 
257 |  | -        return chunk_spec  | 
258 |  | - | 
259 |  | - | 
260 |  | -class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"):  | 
261 |  | -    pass  | 
262 |  | - | 
263 |  | - | 
264 |  | -class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"):  | 
265 |  | -    def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:  | 
266 |  | -        if astype := self.codec_config.get("astype"):  | 
267 |  | -            dtype = _to_zarr_dtype(np.dtype(astype))  # type: ignore[call-overload]  | 
268 |  | -            return replace(chunk_spec, dtype=dtype)  | 
269 |  | -        return chunk_spec  | 
270 |  | - | 
271 |  | -    def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:  | 
272 |  | -        if self.codec_config.get("dtype") is None:  | 
273 |  | -            dtype = _from_zarr_dtype(array_spec.dtype)  | 
274 |  | -            return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)})  | 
275 |  | -        return self  | 
276 |  | - | 
277 |  | - | 
278 |  | -class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"):  | 
279 |  | -    def __init__(self, **codec_config: JSON) -> None:  | 
280 |  | -        super().__init__(**codec_config)  | 
281 |  | - | 
282 |  | -    def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize:  | 
283 |  | -        if self.codec_config.get("dtype") is None:  | 
284 |  | -            dtype = _from_zarr_dtype(array_spec.dtype)  | 
285 |  | -            return Quantize(**{**self.codec_config, "dtype": str(dtype)})  | 
286 |  | -        return self  | 
287 |  | - | 
288 |  | - | 
289 |  | -class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"):  | 
290 |  | -    def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:  | 
291 |  | -        return replace(  | 
292 |  | -            chunk_spec,  | 
293 |  | -            shape=(1 + math.ceil(product(chunk_spec.shape) / 8),),  | 
294 |  | -            dtype=_to_zarr_dtype(np.dtype("uint8")),  | 
295 |  | -        )  | 
296 |  | - | 
297 |  | -    # todo: remove this type: ignore when this class can be defined w.r.t.  | 
298 |  | -    # a single zarr dtype API  | 
299 |  | -    def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None:  # type: ignore[override]  | 
300 |  | -        _dtype = _from_zarr_dtype(dtype)  | 
301 |  | -        if _dtype != np.dtype("bool"):  | 
302 |  | -            raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")  | 
303 |  | - | 
304 |  | - | 
305 |  | -class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"):  | 
306 |  | -    def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:  | 
307 |  | -        dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"]))  # type: ignore[arg-type]  | 
308 |  | -        return replace(chunk_spec, dtype=dtype)  | 
309 |  | - | 
310 |  | -    def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType:  | 
311 |  | -        if self.codec_config.get("decode_dtype") is None:  | 
312 |  | -            # TODO: remove these coverage exemptions the correct way, i.e. with tests  | 
313 |  | -            dtype = _from_zarr_dtype(array_spec.dtype)  # pragma: no cover  | 
314 |  | -            return AsType(**{**self.codec_config, "decode_dtype": str(dtype)})  # pragma: no cover  | 
315 |  | -        return self  | 
316 |  | - | 
317 |  | - | 
318 |  | -# bytes-to-bytes checksum codecs  | 
319 |  | -class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec):  | 
320 |  | -    def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:  | 
321 |  | -        return input_byte_length + 4  # pragma: no cover  | 
322 |  | - | 
323 |  | - | 
324 |  | -class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):  | 
325 |  | -    pass  | 
326 |  | - | 
327 |  | - | 
328 |  | -class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"):  | 
329 |  | -    pass  | 
330 |  | - | 
331 |  | - | 
332 |  | -class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"):  | 
333 |  | -    pass  | 
334 |  | - | 
335 |  | - | 
336 |  | -class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"):  | 
337 |  | -    pass  | 
338 |  | - | 
339 |  | - | 
340 |  | -class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"):  | 
341 |  | -    pass  | 
342 |  | - | 
343 |  | - | 
344 |  | -# array-to-bytes codecs  | 
345 |  | -class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"):  | 
346 |  | -    pass  | 
347 |  | - | 
348 |  | - | 
349 |  | -class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"):  | 
350 |  | -    pass  | 
351 |  | - | 
 | 40 | +    msg = "zarr could not be imported. Zarr 3.1.0 or later is required to use the numcodecs zarr integration."  | 
 | 41 | +    raise ImportError(msg) from e  | 
 | 42 | + | 
 | 43 | +from zarr.codecs._numcodecs import (  | 
 | 44 | +    BZ2,  | 
 | 45 | +    CRC32,  | 
 | 46 | +    CRC32C,  | 
 | 47 | +    LZ4,  | 
 | 48 | +    LZMA,  | 
 | 49 | +    ZFPY,  | 
 | 50 | +    Adler32,  | 
 | 51 | +    AsType,  | 
 | 52 | +    BitRound,  | 
 | 53 | +    Blosc,  | 
 | 54 | +    Delta,  | 
 | 55 | +    FixedScaleOffset,  | 
 | 56 | +    Fletcher32,  | 
 | 57 | +    GZip,  | 
 | 58 | +    JenkinsLookup3,  | 
 | 59 | +    PackBits,  | 
 | 60 | +    PCodec,  | 
 | 61 | +    Quantize,  | 
 | 62 | +    Shuffle,  | 
 | 63 | +    Zlib,  | 
 | 64 | +    Zstd,  | 
 | 65 | +)  | 
352 | 66 | 
 
  | 
353 | 67 | __all__ = [  | 
354 | 68 |     "BZ2",  | 
 | 
0 commit comments