diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..07fe41c5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# GitHub syntax highlighting +pixi.lock linguist-language=YAML linguist-generated=true diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b1a1a7d9..2a142520 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -70,3 +70,33 @@ jobs: fail_ci_if_error: true token: ${{ secrets.CODECOV_TOKEN }} verbose: true + + test-zarr: + runs-on: ubuntu-latest + strategy: + fail-fast: false + + defaults: + run: + shell: bash -el {0} + + steps: + - name: Checkout source + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 # required for version resolution + + - name: Set up Pixi + uses: prefix-dev/setup-pixi@v0.8.14 + with: + pixi-version: v0.49.0 + cache: false + + - name: List deps + shell: "bash -l {0}" + run: pixi run -e default hatch run test:list-deps + + - name: Run tests + shell: "bash -l {0}" + run: pixi run -e default hatch run test:test-zarr \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6b7d99f1..5e08553b 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,7 @@ numcodecs/version.py # Cython generated numcodecs/*.c +# pixi environments +.pixi/* +*.egg-info +pixi.lock \ No newline at end of file diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 3ace9814..7ed0ac57 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -29,17 +29,19 @@ import math from dataclasses import dataclass, replace from functools import cached_property +from importlib.metadata import version from typing import Any, Self from warnings import warn import numpy as np +from packaging.version import Version import numcodecs try: - import zarr + import zarr # noqa: F401 - if zarr.__version__ < "3.0.0": # pragma: no cover + if Version(version('zarr')) < Version("3.0.0"): # pragma: no cover raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") except ImportError as e: # pragma: no cover raise ImportError( @@ -56,6 +58,23 @@ CODEC_PREFIX = "numcodecs." +def _from_zarr_dtype(dtype: Any) -> np.dtype: + """ + Get a numpy data type from an array spec, depending on the zarr version. + """ + if Version(version('zarr')) >= Version("3.1.0"): + return dtype.to_native_dtype() + return dtype # pragma: no cover + + +def _to_zarr_dtype(dtype: np.dtype) -> Any: + if Version(version('zarr')) >= Version("3.1.0"): + from zarr.dtype import parse_data_type + + return parse_data_type(dtype, zarr_format=3) + return dtype # pragma: no cover + + def _expect_name_prefix(codec_name: str) -> str: if not codec_name.startswith(CODEC_PREFIX): raise ValueError( @@ -224,7 +243,8 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if self.codec_config.get("elementsize") is None: - return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize}) return self # pragma: no cover @@ -232,7 +252,8 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec @@ -243,12 +264,14 @@ class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: if self.codec_config.get("dtype") is None: - return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -258,7 +281,8 @@ def __init__(self, **codec_config: JSON) -> None: def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: if self.codec_config.get("dtype") is None: - return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Quantize(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -267,21 +291,27 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), - dtype=np.dtype("uint8"), + dtype=_to_zarr_dtype(np.dtype("uint8")), ) - def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: - if dtype != np.dtype("bool"): + # todo: remove this type: ignore when this class can be defined w.r.t. + # a single zarr dtype API + def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: # type: ignore[override] + _dtype = _from_zarr_dtype(dtype) + if _dtype != np.dtype("bool"): raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + return replace(chunk_spec, dtype=dtype) def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: if self.codec_config.get("decode_dtype") is None: - return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)}) + # TODO: remove these coverage exemptions the correct way, i.e. with tests + dtype = _from_zarr_dtype(array_spec.dtype) # pragma: no cover + return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover return self diff --git a/pyproject.toml b/pyproject.toml index 387603f3..c925cd17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -241,3 +241,29 @@ warn_unreachable = false warn_redundant_casts = true warn_unused_ignores = true warn_unused_configs = true + +[tool.pixi.project] +channels = ["conda-forge"] +platforms = ["linux-64", "osx-arm64", "osx-64", "win-64"] + +[tool.pixi.dependencies] +clang = ">=19.1.7,<20" +c-compiler = ">=1.9.0,<2" +cxx-compiler = ">=1.9.0,<2" +hatch = '==1.14.1' + +[[tool.hatch.envs.test.matrix]] +python = ["3.11"] +zarr = ["3.0.10", "3.1.0"] + +[tool.hatch.envs.test] +dependencies = [ + "zarr=={matrix:zarr}" +] +numpy="==2.2" +features = ["test"] + + +[tool.hatch.envs.test.scripts] +list-deps = "pip list" +test-zarr = "pytest numcodecs/tests/test_zarr3.py numcodecs/tests/test_zarr3_import.py" \ No newline at end of file