Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changes/3039.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
It is now possible to specify no compressor when creating a zarr format 2 array.
This can be done by passing ``compressor=None`` to the various array creation routines.

The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.
13 changes: 10 additions & 3 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@
import numpy.typing as npt
from typing_extensions import deprecated

from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
from zarr.core.array import (
Array,
AsyncArray,
CompressorLike,
create_array,
from_array,
get_array_metadata,
)
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
from zarr.core.buffer import NDArrayLike
from zarr.core.common import (
Expand Down Expand Up @@ -838,7 +845,7 @@
*, # Note: this is a change from v2
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
compressor: CompressorLike = "auto",
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
Expand Down Expand Up @@ -991,7 +998,7 @@
dtype = parse_dtype(dtype, zarr_format)
if not filters:
filters = _default_filters(dtype)
if not compressor:
if compressor == "auto":

Check warning on line 1001 in src/zarr/api/asynchronous.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/api/asynchronous.py#L1001

Added line #L1001 was not covered by tests
compressor = _default_compressor(dtype)
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
if chunks is not None:
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zarr.api.asynchronous as async_api
import zarr.core.array
from zarr._compat import _deprecate_positional_args
from zarr.core.array import Array, AsyncArray
from zarr.core.array import Array, AsyncArray, CompressorLike
from zarr.core.group import Group
from zarr.core.sync import sync
from zarr.core.sync_group import create_hierarchy
Expand Down Expand Up @@ -599,7 +599,7 @@ def create(
*, # Note: this is a change from v2
chunks: ChunkCoords | int | bool | None = None,
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
compressor: CompressorLike = "auto",
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
Expand Down
38 changes: 27 additions & 11 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
T_ArrayMetadata,
)
from zarr.core.metadata.v2 import (
CompressorLikev2,
_default_compressor,
_default_filters,
parse_compressor,
Expand Down Expand Up @@ -303,7 +304,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLikev2 | Literal["auto"] = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -394,7 +395,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -429,7 +430,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -570,7 +571,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -604,7 +605,7 @@
raise ValueError(
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
)
if compressor is not None:
if compressor != "auto":
raise ValueError(
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
)
Expand Down Expand Up @@ -768,7 +769,7 @@
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
compressor: CompressorLikev2 = None,
attributes: dict[str, JSON] | None = None,
) -> ArrayV2Metadata:
if dimension_separator is None:
Expand Down Expand Up @@ -809,7 +810,7 @@
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
compressor: CompressorLike = "auto",
attributes: dict[str, JSON] | None = None,
overwrite: bool = False,
) -> AsyncArray[ArrayV2Metadata]:
Expand All @@ -821,6 +822,17 @@
else:
await ensure_no_existing_node(store_path, zarr_format=2)

compressor_parsed: CompressorLikev2
if compressor == "auto":
compressor_parsed = _default_compressor(dtype)
elif isinstance(compressor, BytesBytesCodec):
raise ValueError(

Check warning on line 829 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L826-L829

Added lines #L826 - L829 were not covered by tests
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
"Use a numcodecs codec directly instead."
)
else:
compressor_parsed = compressor

Check warning on line 834 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L834

Added line #L834 was not covered by tests

metadata = cls._create_metadata_v2(
shape=shape,
dtype=dtype,
Expand All @@ -829,7 +841,7 @@
dimension_separator=dimension_separator,
fill_value=fill_value,
filters=filters,
compressor=compressor,
compressor=compressor_parsed,
attributes=attributes,
)

Expand Down Expand Up @@ -1751,7 +1763,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
config: ArrayConfigLike | None = None,
Expand Down Expand Up @@ -1880,7 +1892,7 @@
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
config: ArrayConfigLike | None = None,
Expand Down Expand Up @@ -3792,7 +3804,11 @@
| Literal["auto"]
| None
)
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
# Union of acceptable types for users to pass in for both v2 and v3 compressors
CompressorLike: TypeAlias = (
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
)

CompressorsLike: TypeAlias = (
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
| dict[str, JSON]
Expand Down
10 changes: 7 additions & 3 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Iterable, Sequence
from enum import Enum
from functools import cached_property
from typing import TYPE_CHECKING, Any, TypedDict, cast
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast

import numcodecs.abc

Expand Down Expand Up @@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
attributes: dict[str, JSON]


# Union of acceptable types for v2 compressors
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None


@dataclass(frozen=True, kw_only=True)
class ArrayV2Metadata(Metadata):
shape: ChunkCoords
Expand All @@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
order: MemoryOrder = "C"
filters: tuple[numcodecs.abc.Codec, ...] | None = None
dimension_separator: Literal[".", "/"] = "."
compressor: numcodecs.abc.Codec | None = None
compressor: CompressorLikev2
attributes: dict[str, JSON] = field(default_factory=dict)
zarr_format: Literal[2] = field(init=False, default=2)

Expand All @@ -65,7 +69,7 @@ def __init__(
fill_value: Any,
order: MemoryOrder,
dimension_separator: Literal[".", "/"] = ".",
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
compressor: CompressorLikev2 = None,
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
attributes: dict[str, JSON] | None = None,
) -> None:
Expand Down
19 changes: 19 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import TYPE_CHECKING

import zarr.codecs

if TYPE_CHECKING:
import pathlib

Expand Down Expand Up @@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
# assert_array_equal doesn't check the type
assert isinstance(result, type(src))
cp.testing.assert_array_equal(result, src[:10, :10])


def test_v2_without_copmpressor() -> None:
# Make sure it's possible to set no compressor for v2 arrays
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
assert arr.compressors == ()


def test_v2_with_v3_copmpressor() -> None:
# Check trying to create a v2 array with a v3 compressor fails
with pytest.raises(
ValueError,
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
):
zarr.create(
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
)