Skip to content

Commit 6388203

Browse files
committed
Merge branch 'main' of https://github.com/zarr-developers/zarr-python into feat/fixed-length-strings
2 parents 3af98aa + d615783 commit 6388203

File tree

10 files changed

+82
-23
lines changed

10 files changed

+82
-23
lines changed

changes/2972.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Avoid an unnecessary memory copy when writing Zarr with obstore

changes/3039.bugfix.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
It is now possible to specify no compressor when creating a zarr format 2 array.
2+
This can be done by passing ``compressor=None`` to the various array creation routines.
3+
4+
The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
5+
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.

src/zarr/api/asynchronous.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from zarr.core.array import (
1313
Array,
1414
AsyncArray,
15+
CompressorLike,
1516
_get_default_chunk_encoding_v2,
1617
create_array,
1718
from_array,
@@ -844,7 +845,7 @@ async def create(
844845
*, # Note: this is a change from v2
845846
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
846847
dtype: npt.DTypeLike | None = None,
847-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
848+
compressor: CompressorLike = "auto",
848849
fill_value: Any | None = 0, # TODO: need type
849850
order: MemoryOrder | None = None,
850851
store: str | StoreLike | None = None,
@@ -995,9 +996,9 @@ async def create(
995996
if chunks is None:
996997
chunks = shape
997998
default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype_wrapped)
998-
if filters is None:
999+
if not filters:
9991000
filters = default_filters # type: ignore[assignment]
1000-
if compressor is None:
1001+
if compressor == "auto":
10011002
compressor = default_compressor
10021003
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
10031004
if chunks is not None:

src/zarr/api/synchronous.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -600,7 +600,7 @@ def create(
600600
*, # Note: this is a change from v2
601601
chunks: ChunkCoords | int | bool | None = None,
602602
dtype: npt.DTypeLike | None = None,
603-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
603+
compressor: CompressorLike = "auto",
604604
fill_value: Any | None = 0, # TODO: need type
605605
order: MemoryOrder | None = None,
606606
store: str | StoreLike | None = None,

src/zarr/core/array.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
T_ArrayMetadata,
109109
)
110110
from zarr.core.metadata.v2 import (
111+
CompressorLikev2,
111112
parse_compressor,
112113
parse_filters,
113114
)
@@ -310,7 +311,7 @@ async def create(
310311
dimension_separator: Literal[".", "/"] | None = None,
311312
order: MemoryOrder | None = None,
312313
filters: list[dict[str, JSON]] | None = None,
313-
compressor: dict[str, JSON] | None = None,
314+
compressor: CompressorLikev2 | Literal["auto"] = "auto",
314315
# runtime
315316
overwrite: bool = False,
316317
data: npt.ArrayLike | None = None,
@@ -401,7 +402,7 @@ async def create(
401402
dimension_separator: Literal[".", "/"] | None = None,
402403
order: MemoryOrder | None = None,
403404
filters: list[dict[str, JSON]] | None = None,
404-
compressor: dict[str, JSON] | None = None,
405+
compressor: CompressorLike = "auto",
405406
# runtime
406407
overwrite: bool = False,
407408
data: npt.ArrayLike | None = None,
@@ -436,7 +437,7 @@ async def create(
436437
dimension_separator: Literal[".", "/"] | None = None,
437438
order: MemoryOrder | None = None,
438439
filters: list[dict[str, JSON]] | None = None,
439-
compressor: dict[str, JSON] | None = None,
440+
compressor: CompressorLike = "auto",
440441
# runtime
441442
overwrite: bool = False,
442443
data: npt.ArrayLike | None = None,
@@ -577,7 +578,7 @@ async def _create(
577578
dimension_separator: Literal[".", "/"] | None = None,
578579
order: MemoryOrder | None = None,
579580
filters: list[dict[str, JSON]] | None = None,
580-
compressor: dict[str, JSON] | None = None,
581+
compressor: CompressorLike = "auto",
581582
# runtime
582583
overwrite: bool = False,
583584
data: npt.ArrayLike | None = None,
@@ -612,7 +613,7 @@ async def _create(
612613
raise ValueError(
613614
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
614615
)
615-
if compressor is not None:
616+
if compressor != "auto":
616617
raise ValueError(
617618
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
618619
)
@@ -782,7 +783,7 @@ def _create_metadata_v2(
782783
dimension_separator: Literal[".", "/"] | None = None,
783784
fill_value: float | None = None,
784785
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
785-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
786+
compressor: CompressorLikev2 = None,
786787
attributes: dict[str, JSON] | None = None,
787788
) -> ArrayV2Metadata:
788789
if dimension_separator is None:
@@ -813,7 +814,7 @@ async def _create_v2(
813814
dimension_separator: Literal[".", "/"] | None = None,
814815
fill_value: float | None = None,
815816
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
816-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
817+
compressor: CompressorLike = "auto",
817818
attributes: dict[str, JSON] | None = None,
818819
overwrite: bool = False,
819820
) -> AsyncArray[ArrayV2Metadata]:
@@ -825,6 +826,17 @@ async def _create_v2(
825826
else:
826827
await ensure_no_existing_node(store_path, zarr_format=2)
827828

829+
compressor_parsed: CompressorLikev2
830+
if compressor == "auto":
831+
_, compressor_parsed = _get_default_chunk_encoding_v2(dtype)
832+
elif isinstance(compressor, BytesBytesCodec):
833+
raise ValueError(
834+
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
835+
"Use a numcodecs codec directly instead."
836+
)
837+
else:
838+
compressor_parsed = compressor
839+
828840
metadata = cls._create_metadata_v2(
829841
shape=shape,
830842
dtype=dtype,
@@ -833,7 +845,7 @@ async def _create_v2(
833845
dimension_separator=dimension_separator,
834846
fill_value=fill_value,
835847
filters=filters,
836-
compressor=compressor,
848+
compressor=compressor_parsed,
837849
attributes=attributes,
838850
)
839851

@@ -1780,7 +1792,7 @@ def create(
17801792
dimension_separator: Literal[".", "/"] | None = None,
17811793
order: MemoryOrder | None = None,
17821794
filters: list[dict[str, JSON]] | None = None,
1783-
compressor: dict[str, JSON] | None = None,
1795+
compressor: CompressorLike = "auto",
17841796
# runtime
17851797
overwrite: bool = False,
17861798
config: ArrayConfigLike | None = None,
@@ -1909,7 +1921,7 @@ def _create(
19091921
dimension_separator: Literal[".", "/"] | None = None,
19101922
order: MemoryOrder | None = None,
19111923
filters: list[dict[str, JSON]] | None = None,
1912-
compressor: dict[str, JSON] | None = None,
1924+
compressor: CompressorLike = "auto",
19131925
# runtime
19141926
overwrite: bool = False,
19151927
config: ArrayConfigLike | None = None,
@@ -3814,7 +3826,11 @@ def _build_parents(
38143826
| Literal["auto"]
38153827
| None
38163828
)
3817-
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
3829+
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3830+
CompressorLike: TypeAlias = (
3831+
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3832+
)
3833+
38183834
CompressorsLike: TypeAlias = (
38193835
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
38203836
| dict[str, JSON]

src/zarr/core/buffer/core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,19 @@ def as_numpy_array(self) -> npt.NDArray[Any]:
255255
"""
256256
...
257257

258+
def as_buffer_like(self) -> BytesLike:
259+
"""Returns the buffer as an object that implements the Python buffer protocol.
260+
261+
Notes
262+
-----
263+
Might have to copy data, since the implementation uses `.as_numpy_array()`.
264+
265+
Returns
266+
-------
267+
An object that implements the Python buffer protocol
268+
"""
269+
return memoryview(self.as_numpy_array()) # type: ignore[arg-type]
270+
258271
def to_bytes(self) -> bytes:
259272
"""Returns the buffer as `bytes` (host memory).
260273

src/zarr/core/metadata/v2.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import base64
44
import warnings
55
from collections.abc import Iterable, Sequence
6-
from typing import TYPE_CHECKING, Any, TypedDict
6+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
77

88
import numcodecs.abc
99

@@ -41,6 +41,10 @@ class ArrayV2MetadataDict(TypedDict):
4141
attributes: dict[str, JSON]
4242

4343

44+
# Union of acceptable types for v2 compressors
45+
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
46+
47+
4448
@dataclass(frozen=True, kw_only=True)
4549
class ArrayV2Metadata(Metadata):
4650
shape: ChunkCoords
@@ -50,7 +54,7 @@ class ArrayV2Metadata(Metadata):
5054
order: MemoryOrder = "C"
5155
filters: tuple[numcodecs.abc.Codec, ...] | None = None
5256
dimension_separator: Literal[".", "/"] = "."
53-
compressor: numcodecs.abc.Codec | None = None
57+
compressor: CompressorLikev2
5458
attributes: dict[str, JSON] = field(default_factory=dict)
5559
zarr_format: Literal[2] = field(init=False, default=2)
5660

@@ -63,7 +67,7 @@ def __init__(
6367
fill_value: Any,
6468
order: MemoryOrder,
6569
dimension_separator: Literal[".", "/"] = ".",
66-
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
70+
compressor: CompressorLikev2 = None,
6771
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
6872
attributes: dict[str, JSON] | None = None,
6973
) -> None:

src/zarr/storage/_local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def _put(
5252
with path.open("r+b") as f:
5353
f.seek(start)
5454
# write takes any object supporting the buffer protocol
55-
f.write(value.as_numpy_array()) # type: ignore[arg-type]
55+
f.write(value.as_buffer_like())
5656
return None
5757
else:
58-
view = memoryview(value.as_numpy_array()) # type: ignore[arg-type]
58+
view = value.as_buffer_like()
5959
if exclusive:
6060
mode = "xb"
6161
else:

src/zarr/storage/_obstore.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,15 @@ async def set(self, key: str, value: Buffer) -> None:
161161

162162
self._check_writable()
163163

164-
buf = value.to_bytes()
164+
buf = value.as_buffer_like()
165165
await obs.put_async(self.store, key, buf)
166166

167167
async def set_if_not_exists(self, key: str, value: Buffer) -> None:
168168
# docstring inherited
169169
import obstore as obs
170170

171171
self._check_writable()
172-
buf = value.to_bytes()
172+
buf = value.as_buffer_like()
173173
with contextlib.suppress(obs.exceptions.AlreadyExistsError):
174174
await obs.put_async(self.store, key, buf, mode="create")
175175

tests/test_api.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import TYPE_CHECKING
44

5+
import zarr.codecs
6+
57
if TYPE_CHECKING:
68
import pathlib
79

@@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
11901192
# assert_array_equal doesn't check the type
11911193
assert isinstance(result, type(src))
11921194
cp.testing.assert_array_equal(result, src[:10, :10])
1195+
1196+
1197+
def test_v2_without_compressor() -> None:
1198+
# Make sure it's possible to set no compressor for v2 arrays
1199+
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
1200+
assert arr.compressors == ()
1201+
1202+
1203+
def test_v2_with_v3_compressor() -> None:
1204+
# Check trying to create a v2 array with a v3 compressor fails
1205+
with pytest.raises(
1206+
ValueError,
1207+
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
1208+
):
1209+
zarr.create(
1210+
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
1211+
)

0 commit comments

Comments
 (0)