diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b1a1a7d9..3989946b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,8 +13,16 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12", "3.13"] + # Run full test matrix on latest version of zarr + zarr-version: ["3.1.*"] # macos-13 is an intel runner, macos-14 is an arm64 runner - platform: [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] + platform: + [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] + include: + # Add one test run for zarr 3.0.x + - python-version: "3.12" + zarr-version: "3.0.*" + platform: "ubuntu-latest" defaults: run: @@ -26,6 +34,7 @@ jobs: with: submodules: recursive fetch-depth: 0 # required for version resolution + fetch-tags: true - name: Set up Conda uses: conda-incubator/setup-miniconda@v3.1.1 @@ -56,14 +65,14 @@ jobs: # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out # so we can have some tests of our minimum version of numpy (1.24) if: matrix.python-version != '3.11' - run: python -m pip install zarr>=3 + run: python -m pip install zarr==${{ matrix.zarr-version }} - name: List installed packages run: python -m pip list - name: Run tests shell: "bash -l {0}" - run: pytest -v + run: pytest -v --pyargs numcodecs.tests - uses: codecov/codecov-action@v5 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c08b8f8a..c6b0ab6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,4 +30,4 @@ repos: hooks: - id: mypy args: [--config-file, pyproject.toml] - additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3'] + additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.1'] diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 3ace9814..3384c3e6 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -29,19 +29,21 @@ import math from dataclasses import dataclass, replace from functools import cached_property -from typing import Any, Self +from importlib.metadata import version +from typing import TYPE_CHECKING, Any, Self from warnings import warn import numpy as np +from packaging.version import Version import numcodecs try: - import zarr + import zarr # noqa: F401 - if zarr.__version__ < "3.0.0": # pragma: no cover + if Version(version('zarr')) < Version("3.0.0"): raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") -except ImportError as e: # pragma: no cover +except ImportError as e: raise ImportError( "zarr 3.0.0 or later is required to use the numcodecs zarr integration." ) from e @@ -53,14 +55,32 @@ from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, product +if TYPE_CHECKING: + from zarr.dtype import ZDType + CODEC_PREFIX = "numcodecs." +def _from_zarr_dtype(dtype: Any) -> np.dtype: + """ + Get a numpy data type from an array spec, depending on the zarr version. + """ + if Version(version('zarr')) >= Version("3.1.0"): + return dtype.to_native_dtype() + return dtype + + +def _to_zarr_dtype(dtype: np.dtype) -> Any: + if Version(version('zarr')) >= Version("3.1.0"): + from zarr.dtype import parse_data_type + + return parse_data_type(dtype, zarr_format=3) + return dtype + + def _expect_name_prefix(codec_name: str) -> str: if not codec_name.startswith(CODEC_PREFIX): - raise ValueError( - f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead." - ) # pragma: no cover + raise ValueError(f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead.") return codec_name.removeprefix(CODEC_PREFIX) @@ -69,7 +89,7 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: if not parsed_name.startswith(CODEC_PREFIX): raise ValueError( f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead." - ) # pragma: no cover + ) id = _expect_name_prefix(parsed_name) return {"id": id, **parsed_configuration} @@ -95,7 +115,7 @@ def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( "The codec name needs to be supplied through the `codec_name` attribute." - ) # pragma: no cover + ) unprefixed_codec_name = _expect_name_prefix(self.codec_name) if "id" not in codec_config: @@ -103,7 +123,7 @@ def __init__(self, **codec_config: JSON) -> None: elif codec_config["id"] != unprefixed_codec_name: raise ValueError( f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}." - ) # pragma: no cover + ) object.__setattr__(self, "codec_config", codec_config) warn( @@ -224,7 +244,8 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if self.codec_config.get("elementsize") is None: - return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize}) return self # pragma: no cover @@ -232,7 +253,8 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec @@ -243,12 +265,14 @@ class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: if self.codec_config.get("dtype") is None: - return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -258,7 +282,8 @@ def __init__(self, **codec_config: JSON) -> None: def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: if self.codec_config.get("dtype") is None: - return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Quantize(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -267,28 +292,31 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), - dtype=np.dtype("uint8"), + dtype=_to_zarr_dtype(np.dtype("uint8")), ) - def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: - if dtype != np.dtype("bool"): + def validate(self, *, shape: tuple[int, ...], dtype: "ZDType[Any, Any]", **_kwargs) -> None: # noqa: UP037 + _dtype = _from_zarr_dtype(dtype) + if _dtype != np.dtype("bool"): raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + return replace(chunk_spec, dtype=dtype) def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: if self.codec_config.get("decode_dtype") is None: - return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) return self # bytes-to-bytes checksum codecs class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - return input_byte_length + 4 # pragma: no cover + return input_byte_length + 4 class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): diff --git a/pyproject.toml b/pyproject.toml index 387603f3..7ed4aefe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ description = """ A Python package providing buffer compression and transformation codecs \ for use in data storage and communication applications.""" readme = "README.rst" -dependencies = ["numpy>=1.24", "typing_extensions"] +dependencies = ["numpy>=1.24", "typing_extensions", "packaging"] requires-python = ">=3.11" dynamic = [ "version",