Skip to content

Fix to work with Zarr 3.1.0 #777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.11", "3.12", "3.13"]
# Run full test matrix on latest version of zarr
zarr-version: ["3.1.*"]
# macos-13 is an intel runner, macos-14 is an arm64 runner
platform: [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14]
platform:
[ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14]
include:
# Add one test run for zarr 3.0.x
- python-version: "3.12"
zarr-version: "3.0.*"
platform: "ubuntu-latest"

defaults:
run:
Expand All @@ -26,6 +34,7 @@ jobs:
with:
submodules: recursive
fetch-depth: 0 # required for version resolution
fetch-tags: true

- name: Set up Conda
uses: conda-incubator/[email protected]
Expand Down Expand Up @@ -56,14 +65,14 @@ jobs:
# Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out
# so we can have some tests of our minimum version of numpy (1.24)
if: matrix.python-version != '3.11'
run: python -m pip install zarr>=3
run: python -m pip install zarr==${{ matrix.zarr-version }}

- name: List installed packages
run: python -m pip list

- name: Run tests
shell: "bash -l {0}"
run: pytest -v
run: pytest -v --pyargs numcodecs.tests

- uses: codecov/codecov-action@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ repos:
hooks:
- id: mypy
args: [--config-file, pyproject.toml]
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3']
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.1']
70 changes: 49 additions & 21 deletions numcodecs/zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,21 @@
import math
from dataclasses import dataclass, replace
from functools import cached_property
from typing import Any, Self
from importlib.metadata import version
from typing import TYPE_CHECKING, Any, Self
from warnings import warn

import numpy as np
from packaging.version import Version

import numcodecs

try:
import zarr
import zarr # noqa: F401

if zarr.__version__ < "3.0.0": # pragma: no cover
if Version(version('zarr')) < Version("3.0.0"):
raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.")
except ImportError as e: # pragma: no cover
except ImportError as e:
raise ImportError(
"zarr 3.0.0 or later is required to use the numcodecs zarr integration."
) from e
Expand All @@ -53,14 +55,32 @@
from zarr.core.buffer.cpu import as_numpy_array_wrapper
from zarr.core.common import JSON, parse_named_configuration, product

if TYPE_CHECKING:
from zarr.dtype import ZDType

CODEC_PREFIX = "numcodecs."


def _from_zarr_dtype(dtype: Any) -> np.dtype:
"""
Get a numpy data type from an array spec, depending on the zarr version.
"""
if Version(version('zarr')) >= Version("3.1.0"):
return dtype.to_native_dtype()
return dtype


def _to_zarr_dtype(dtype: np.dtype) -> Any:
if Version(version('zarr')) >= Version("3.1.0"):
from zarr.dtype import parse_data_type

return parse_data_type(dtype, zarr_format=3)
return dtype


def _expect_name_prefix(codec_name: str) -> str:
if not codec_name.startswith(CODEC_PREFIX):
raise ValueError(
f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead."
) # pragma: no cover
raise ValueError(f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead.")
return codec_name.removeprefix(CODEC_PREFIX)


Expand All @@ -69,7 +89,7 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]:
if not parsed_name.startswith(CODEC_PREFIX):
raise ValueError(
f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead."
) # pragma: no cover
)
id = _expect_name_prefix(parsed_name)
return {"id": id, **parsed_configuration}

Expand All @@ -95,15 +115,15 @@ def __init__(self, **codec_config: JSON) -> None:
if not self.codec_name:
raise ValueError(
"The codec name needs to be supplied through the `codec_name` attribute."
) # pragma: no cover
)
unprefixed_codec_name = _expect_name_prefix(self.codec_name)

if "id" not in codec_config:
codec_config = {"id": unprefixed_codec_name, **codec_config}
elif codec_config["id"] != unprefixed_codec_name:
raise ValueError(
f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}."
) # pragma: no cover
)

object.__setattr__(self, "codec_config", codec_config)
warn(
Expand Down Expand Up @@ -224,15 +244,17 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"):
class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"):
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:
if self.codec_config.get("elementsize") is None:
return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize})
dtype = _from_zarr_dtype(array_spec.dtype)
return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize})
return self # pragma: no cover


# array-to-array codecs ("filters")
class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload]
return replace(chunk_spec, dtype=dtype)
return chunk_spec


Expand All @@ -243,12 +265,14 @@ class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"):
class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload]
return replace(chunk_spec, dtype=dtype)
return chunk_spec

def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:
if self.codec_config.get("dtype") is None:
return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)})
dtype = _from_zarr_dtype(array_spec.dtype)
return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)})
return self


Expand All @@ -258,7 +282,8 @@ def __init__(self, **codec_config: JSON) -> None:

def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize:
if self.codec_config.get("dtype") is None:
return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)})
dtype = _from_zarr_dtype(array_spec.dtype)
return Quantize(**{**self.codec_config, "dtype": str(dtype)})
return self


Expand All @@ -267,28 +292,31 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
return replace(
chunk_spec,
shape=(1 + math.ceil(product(chunk_spec.shape) / 8),),
dtype=np.dtype("uint8"),
dtype=_to_zarr_dtype(np.dtype("uint8")),
)

def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None:
if dtype != np.dtype("bool"):
def validate(self, *, shape: tuple[int, ...], dtype: "ZDType[Any, Any]", **_kwargs) -> None: # noqa: UP037
_dtype = _from_zarr_dtype(dtype)
if _dtype != np.dtype("bool"):
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")


class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type]
dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type]
return replace(chunk_spec, dtype=dtype)

def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType:
if self.codec_config.get("decode_dtype") is None:
return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)})
dtype = _from_zarr_dtype(array_spec.dtype)
return AsType(**{**self.codec_config, "decode_dtype": str(dtype)})
return self


# bytes-to-bytes checksum codecs
class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec):
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
return input_byte_length + 4 # pragma: no cover
return input_byte_length + 4


class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ description = """
A Python package providing buffer compression and transformation codecs \
for use in data storage and communication applications."""
readme = "README.rst"
dependencies = ["numpy>=1.24", "typing_extensions"]
dependencies = ["numpy>=1.24", "typing_extensions", "packaging"]
requires-python = ">=3.11"
dynamic = [
"version",
Expand Down
Loading