Skip to content

Commit 8cc1fda

Browse files
committed
test against zarr-with-numcodecs branch
1 parent e994840 commit 8cc1fda

File tree

2 files changed

+35
-319
lines changed

2 files changed

+35
-319
lines changed

numcodecs/zarr3.py

Lines changed: 30 additions & 316 deletions
Original file line numberDiff line numberDiff line change
@@ -25,330 +25,44 @@
2525

2626
from __future__ import annotations
2727

28-
import asyncio
29-
import math
30-
from dataclasses import dataclass, replace
31-
from functools import cached_property
3228
from importlib.metadata import version
33-
from typing import Any, Self
34-
from warnings import warn
3529

36-
import numpy as np
3730
from packaging.version import Version
3831

39-
import numcodecs
40-
4132
try:
4233
import zarr # noqa: F401
4334

44-
if Version(version('zarr')) < Version("3.0.0"): # pragma: no cover
45-
raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.")
35+
zarr_version = version('zarr')
36+
if Version(zarr_version) < Version("3.0.8"): # pragma: no cover
37+
msg = f"zarr 3.0.9 or later is required to use the numcodecs zarr integration. Got {zarr_version}."
38+
raise ImportError(msg)
4639
except ImportError as e: # pragma: no cover
47-
raise ImportError(
48-
"zarr 3.0.0 or later is required to use the numcodecs zarr integration."
49-
) from e
50-
51-
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
52-
from zarr.abc.metadata import Metadata
53-
from zarr.core.array_spec import ArraySpec
54-
from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
55-
from zarr.core.buffer.cpu import as_numpy_array_wrapper
56-
from zarr.core.common import JSON, parse_named_configuration, product
57-
58-
CODEC_PREFIX = "numcodecs."
59-
60-
61-
def _from_zarr_dtype(dtype: Any) -> np.dtype:
62-
"""
63-
Get a numpy data type from an array spec, depending on the zarr version.
64-
"""
65-
if Version(version('zarr')) >= Version("3.1.0"):
66-
return dtype.to_native_dtype()
67-
return dtype # pragma: no cover
68-
69-
70-
def _to_zarr_dtype(dtype: np.dtype) -> Any:
71-
if Version(version('zarr')) >= Version("3.1.0"):
72-
from zarr.dtype import parse_data_type
73-
74-
return parse_data_type(dtype, zarr_format=3)
75-
return dtype # pragma: no cover
76-
77-
78-
def _expect_name_prefix(codec_name: str) -> str:
79-
if not codec_name.startswith(CODEC_PREFIX):
80-
raise ValueError(
81-
f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead."
82-
) # pragma: no cover
83-
return codec_name.removeprefix(CODEC_PREFIX)
84-
85-
86-
def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]:
87-
parsed_name, parsed_configuration = parse_named_configuration(data)
88-
if not parsed_name.startswith(CODEC_PREFIX):
89-
raise ValueError(
90-
f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead."
91-
) # pragma: no cover
92-
id = _expect_name_prefix(parsed_name)
93-
return {"id": id, **parsed_configuration}
94-
95-
96-
@dataclass(frozen=True)
97-
class _NumcodecsCodec(Metadata):
98-
codec_name: str
99-
codec_config: dict[str, JSON]
100-
101-
def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs):
102-
"""To be used only when creating the actual public-facing codec class."""
103-
super().__init_subclass__(**kwargs)
104-
if codec_name is not None:
105-
namespace = codec_name
106-
107-
cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}"
108-
cls.codec_name = f"{CODEC_PREFIX}{namespace}"
109-
cls.__doc__ = f"""
110-
See :class:`{cls_name}` for more details and parameters.
111-
"""
112-
113-
def __init__(self, **codec_config: JSON) -> None:
114-
if not self.codec_name:
115-
raise ValueError(
116-
"The codec name needs to be supplied through the `codec_name` attribute."
117-
) # pragma: no cover
118-
unprefixed_codec_name = _expect_name_prefix(self.codec_name)
119-
120-
if "id" not in codec_config:
121-
codec_config = {"id": unprefixed_codec_name, **codec_config}
122-
elif codec_config["id"] != unprefixed_codec_name:
123-
raise ValueError(
124-
f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}."
125-
) # pragma: no cover
126-
127-
object.__setattr__(self, "codec_config", codec_config)
128-
warn(
129-
"Numcodecs codecs are not in the Zarr version 3 specification and "
130-
"may not be supported by other zarr implementations.",
131-
category=UserWarning,
132-
stacklevel=2,
133-
)
134-
135-
@cached_property
136-
def _codec(self) -> numcodecs.abc.Codec:
137-
return numcodecs.get_codec(self.codec_config)
138-
139-
@classmethod
140-
def from_dict(cls, data: dict[str, JSON]) -> Self:
141-
codec_config = _parse_codec_configuration(data)
142-
return cls(**codec_config)
143-
144-
def to_dict(self) -> dict[str, JSON]:
145-
codec_config = self.codec_config.copy()
146-
codec_config.pop("id", None)
147-
return {
148-
"name": self.codec_name,
149-
"configuration": codec_config,
150-
}
151-
152-
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
153-
raise NotImplementedError # pragma: no cover
154-
155-
# Override __repr__ because dynamically constructed classes don't seem to work otherwise
156-
def __repr__(self) -> str:
157-
codec_config = self.codec_config.copy()
158-
codec_config.pop("id", None)
159-
return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})"
160-
161-
162-
class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec):
163-
def __init__(self, **codec_config: JSON) -> None:
164-
super().__init__(**codec_config)
165-
166-
async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer:
167-
return await asyncio.to_thread(
168-
as_numpy_array_wrapper,
169-
self._codec.decode,
170-
chunk_bytes,
171-
chunk_spec.prototype,
172-
)
173-
174-
def _encode(self, chunk_bytes: Buffer, prototype: BufferPrototype) -> Buffer:
175-
encoded = self._codec.encode(chunk_bytes.as_array_like())
176-
if isinstance(encoded, np.ndarray): # Required for checksum codecs
177-
return prototype.buffer.from_bytes(encoded.tobytes())
178-
return prototype.buffer.from_bytes(encoded)
179-
180-
async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer:
181-
return await asyncio.to_thread(self._encode, chunk_bytes, chunk_spec.prototype)
182-
183-
184-
class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec):
185-
def __init__(self, **codec_config: JSON) -> None:
186-
super().__init__(**codec_config)
187-
188-
async def _decode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
189-
chunk_ndarray = chunk_array.as_ndarray_like()
190-
out = await asyncio.to_thread(self._codec.decode, chunk_ndarray)
191-
return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))
192-
193-
async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
194-
chunk_ndarray = chunk_array.as_ndarray_like()
195-
out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)
196-
return chunk_spec.prototype.nd_buffer.from_ndarray_like(out)
197-
198-
199-
class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec):
200-
def __init__(self, **codec_config: JSON) -> None:
201-
super().__init__(**codec_config)
202-
203-
async def _decode_single(self, chunk_buffer: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
204-
chunk_bytes = chunk_buffer.to_bytes()
205-
out = await asyncio.to_thread(self._codec.decode, chunk_bytes)
206-
return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))
207-
208-
async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) -> Buffer:
209-
chunk_ndarray = chunk_ndbuffer.as_ndarray_like()
210-
out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)
211-
return chunk_spec.prototype.buffer.from_bytes(out)
212-
213-
214-
# bytes-to-bytes codecs
215-
class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"):
216-
pass
217-
218-
219-
class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"):
220-
pass
221-
222-
223-
class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"):
224-
pass
225-
226-
227-
class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"):
228-
pass
229-
230-
231-
class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"):
232-
pass
233-
234-
235-
class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"):
236-
pass
237-
238-
239-
class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"):
240-
pass
241-
242-
243-
class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"):
244-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:
245-
if self.codec_config.get("elementsize") is None:
246-
dtype = _from_zarr_dtype(array_spec.dtype)
247-
return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize})
248-
return self # pragma: no cover
249-
250-
251-
# array-to-array codecs ("filters")
252-
class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"):
253-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
254-
if astype := self.codec_config.get("astype"):
255-
dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload]
256-
return replace(chunk_spec, dtype=dtype)
257-
return chunk_spec
258-
259-
260-
class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"):
261-
pass
262-
263-
264-
class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"):
265-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
266-
if astype := self.codec_config.get("astype"):
267-
dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload]
268-
return replace(chunk_spec, dtype=dtype)
269-
return chunk_spec
270-
271-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:
272-
if self.codec_config.get("dtype") is None:
273-
dtype = _from_zarr_dtype(array_spec.dtype)
274-
return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)})
275-
return self
276-
277-
278-
class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"):
279-
def __init__(self, **codec_config: JSON) -> None:
280-
super().__init__(**codec_config)
281-
282-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize:
283-
if self.codec_config.get("dtype") is None:
284-
dtype = _from_zarr_dtype(array_spec.dtype)
285-
return Quantize(**{**self.codec_config, "dtype": str(dtype)})
286-
return self
287-
288-
289-
class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"):
290-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
291-
return replace(
292-
chunk_spec,
293-
shape=(1 + math.ceil(product(chunk_spec.shape) / 8),),
294-
dtype=_to_zarr_dtype(np.dtype("uint8")),
295-
)
296-
297-
# todo: remove this type: ignore when this class can be defined w.r.t.
298-
# a single zarr dtype API
299-
def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: # type: ignore[override]
300-
_dtype = _from_zarr_dtype(dtype)
301-
if _dtype != np.dtype("bool"):
302-
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")
303-
304-
305-
class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"):
306-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
307-
dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type]
308-
return replace(chunk_spec, dtype=dtype)
309-
310-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType:
311-
if self.codec_config.get("decode_dtype") is None:
312-
# TODO: remove these coverage exemptions the correct way, i.e. with tests
313-
dtype = _from_zarr_dtype(array_spec.dtype) # pragma: no cover
314-
return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover
315-
return self
316-
317-
318-
# bytes-to-bytes checksum codecs
319-
class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec):
320-
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
321-
return input_byte_length + 4 # pragma: no cover
322-
323-
324-
class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):
325-
pass
326-
327-
328-
class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"):
329-
pass
330-
331-
332-
class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"):
333-
pass
334-
335-
336-
class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"):
337-
pass
338-
339-
340-
class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"):
341-
pass
342-
343-
344-
# array-to-bytes codecs
345-
class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"):
346-
pass
347-
348-
349-
class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"):
350-
pass
351-
40+
msg = "zarr could not be imported. Zarr 3.1.0 or later is required to use the numcodecs zarr integration."
41+
raise ImportError(msg) from e
42+
43+
from zarr.codecs._numcodecs import (
44+
BZ2,
45+
CRC32,
46+
CRC32C,
47+
LZ4,
48+
LZMA,
49+
ZFPY,
50+
Adler32,
51+
AsType,
52+
BitRound,
53+
Blosc,
54+
Delta,
55+
FixedScaleOffset,
56+
Fletcher32,
57+
GZip,
58+
JenkinsLookup3,
59+
PackBits,
60+
PCodec,
61+
Quantize,
62+
Shuffle,
63+
Zlib,
64+
Zstd,
65+
)
35266

35367
__all__ = [
35468
"BZ2",

pyproject.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ classifiers = [
3333
maintainers = [
3434
{ name = "Alistair Miles", email = "[email protected]" },
3535
]
36-
license = {text = "MIT License"}
36+
license = "MIT"
3737
license-files = [
3838
"LICENSE.txt",
3939
"c-blosc/LICENSE.txt",
@@ -252,13 +252,15 @@ c-compiler = ">=1.9.0,<2"
252252
cxx-compiler = ">=1.9.0,<2"
253253
hatch = '==1.14.1'
254254

255+
[tool.hatch.metadata]
256+
allow-direct-references = true
257+
255258
[[tool.hatch.envs.test.matrix]]
256259
python = ["3.11"]
257-
zarr = ["3.0.10", "3.1.0"]
258260

259261
[tool.hatch.envs.test]
260262
dependencies = [
261-
"zarr=={matrix:zarr}"
263+
"zarr @ git+https://github.com/d-v-b/zarr-python.git@b6b2260a953e38bd4e432508b2063c5beda11703"
262264
]
263265
numpy="==2.2"
264266
features = ["test"]

0 commit comments

Comments
 (0)