diff --git a/changes/3039.bugfix.rst b/changes/3039.bugfix.rst new file mode 100644 index 0000000000..be2b424cf5 --- /dev/null +++ b/changes/3039.bugfix.rst @@ -0,0 +1,5 @@ +It is now possible to specify no compressor when creating a zarr format 2 array. +This can be done by passing ``compressor=None`` to the various array creation routines. + +The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. +To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead. diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index ac143f6dea..59261cca8a 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -9,7 +9,14 @@ import numpy.typing as npt from typing_extensions import deprecated -from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata +from zarr.core.array import ( + Array, + AsyncArray, + CompressorLike, + create_array, + from_array, + get_array_metadata, +) from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams from zarr.core.buffer import NDArrayLike from zarr.core.common import ( @@ -838,7 +845,7 @@ async def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # TODO: default and type change + compressor: CompressorLike = "auto", fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, @@ -991,7 +998,7 @@ async def create( dtype = parse_dtype(dtype, zarr_format) if not filters: filters = _default_filters(dtype) - if not compressor: + if compressor == "auto": compressor = _default_compressor(dtype) elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr] if chunks is not None: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 5662f5c247..24ab937db5 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -7,7 +7,7 @@ import zarr.api.asynchronous as async_api import zarr.core.array from zarr._compat import _deprecate_positional_args -from zarr.core.array import Array, AsyncArray +from zarr.core.array import Array, AsyncArray, CompressorLike from zarr.core.group import Group from zarr.core.sync import sync from zarr.core.sync_group import create_hierarchy @@ -599,7 +599,7 @@ def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | bool | None = None, dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # TODO: default and type change + compressor: CompressorLike = "auto", fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9852bf8d5f..cf4c36cc22 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -102,6 +102,7 @@ T_ArrayMetadata, ) from zarr.core.metadata.v2 import ( + CompressorLikev2, _default_compressor, _default_filters, parse_compressor, @@ -303,7 +304,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLikev2 | Literal["auto"] = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -394,7 +395,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -429,7 +430,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -570,7 +571,7 @@ async def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -604,7 +605,7 @@ async def _create( raise ValueError( "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead." ) - if compressor is not None: + if compressor != "auto": raise ValueError( "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead." ) @@ -768,7 +769,7 @@ def _create_metadata_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, - compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None, + compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: @@ -809,7 +810,7 @@ async def _create_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, - compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None, + compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArray[ArrayV2Metadata]: @@ -821,6 +822,17 @@ async def _create_v2( else: await ensure_no_existing_node(store_path, zarr_format=2) + compressor_parsed: CompressorLikev2 + if compressor == "auto": + compressor_parsed = _default_compressor(dtype) + elif isinstance(compressor, BytesBytesCodec): + raise ValueError( + "Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. " + "Use a numcodecs codec directly instead." + ) + else: + compressor_parsed = compressor + metadata = cls._create_metadata_v2( shape=shape, dtype=dtype, @@ -829,7 +841,7 @@ async def _create_v2( dimension_separator=dimension_separator, fill_value=fill_value, filters=filters, - compressor=compressor, + compressor=compressor_parsed, attributes=attributes, ) @@ -1751,7 +1763,7 @@ def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, @@ -1880,7 +1892,7 @@ def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, @@ -3792,7 +3804,11 @@ def _get_default_codecs( | Literal["auto"] | None ) -CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None +# Union of acceptable types for users to pass in for both v2 and v3 compressors +CompressorLike: TypeAlias = ( + dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None +) + CompressorsLike: TypeAlias = ( Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec] | dict[str, JSON] diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index d19193963f..029a3e09a7 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -5,7 +5,7 @@ from collections.abc import Iterable, Sequence from enum import Enum from functools import cached_property -from typing import TYPE_CHECKING, Any, TypedDict, cast +from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast import numcodecs.abc @@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict): attributes: dict[str, JSON] +# Union of acceptable types for v2 compressors +CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None + + @dataclass(frozen=True, kw_only=True) class ArrayV2Metadata(Metadata): shape: ChunkCoords @@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata): order: MemoryOrder = "C" filters: tuple[numcodecs.abc.Codec, ...] | None = None dimension_separator: Literal[".", "/"] = "." - compressor: numcodecs.abc.Codec | None = None + compressor: CompressorLikev2 attributes: dict[str, JSON] = field(default_factory=dict) zarr_format: Literal[2] = field(init=False, default=2) @@ -65,7 +69,7 @@ def __init__( fill_value: Any, order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", - compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, + compressor: CompressorLikev2 = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, ) -> None: diff --git a/tests/test_api.py b/tests/test_api.py index 9f03a1067a..d1912f7238 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -2,6 +2,8 @@ from typing import TYPE_CHECKING +import zarr.codecs + if TYPE_CHECKING: import pathlib @@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: # assert_array_equal doesn't check the type assert isinstance(result, type(src)) cp.testing.assert_array_equal(result, src[:10, :10]) + + +def test_v2_without_compressor() -> None: + # Make sure it's possible to set no compressor for v2 arrays + arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None) + assert arr.compressors == () + + +def test_v2_with_v3_compressor() -> None: + # Check trying to create a v2 array with a v3 compressor fails + with pytest.raises( + ValueError, + match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.", + ): + zarr.create( + store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec() + )