Skip to content

Commit fe49f5f

Browse files
committed
fix: zarr v2 compatability fixes
- port normalize_chunks from v2 - add array.store property - default to append in create
1 parent b1ecdd5 commit fe49f5f

File tree

5 files changed

+75
-21
lines changed

5 files changed

+75
-21
lines changed

src/zarr/api/asynchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ async def create(
724724
if meta_array is not None:
725725
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
726726

727-
mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
727+
mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a"))
728728
store_path = await make_store_path(store, mode=mode)
729729
if path is not None:
730730
store_path = store_path / path

src/zarr/core/array.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
import numpy as np
99
import numpy.typing as npt
1010

11-
from zarr.abc.store import set_or_delete
11+
from zarr.abc.store import Store, set_or_delete
1212
from zarr.codecs import BytesCodec
1313
from zarr.codecs._v2 import V2Compressor, V2Filters
1414
from zarr.core.attributes import Attributes
1515
from zarr.core.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype
16-
from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks
16+
from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks
1717
from zarr.core.chunk_key_encodings import (
1818
ChunkKeyEncoding,
1919
DefaultChunkKeyEncoding,
@@ -129,7 +129,7 @@ async def create(
129129
fill_value: Any | None = None,
130130
attributes: dict[str, JSON] | None = None,
131131
# v3 only
132-
chunk_shape: ChunkCoords | None = None,
132+
chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types
133133
chunk_key_encoding: (
134134
ChunkKeyEncoding
135135
| tuple[Literal["default"], Literal[".", "/"]]
@@ -139,7 +139,7 @@ async def create(
139139
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
140140
dimension_names: Iterable[str] | None = None,
141141
# v2 only
142-
chunks: ShapeLike | None = None,
142+
chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types
143143
dimension_separator: Literal[".", "/"] | None = None,
144144
order: Literal["C", "F"] | None = None,
145145
filters: list[dict[str, JSON]] | None = None,
@@ -152,15 +152,14 @@ async def create(
152152

153153
shape = parse_shapelike(shape)
154154

155-
if chunk_shape is None:
156-
if chunks is None:
157-
chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize)
158-
else:
159-
chunks = parse_shapelike(chunks)
155+
if chunks is not None and chunk_shape is not None:
156+
raise ValueError("Only one of chunk_shape or chunks can be provided.")
160157

161-
chunk_shape = chunks
162-
elif chunks is not None:
163-
raise ValueError("Only one of chunk_shape or chunks must be provided.")
158+
dtype = np.dtype(dtype)
159+
if chunks:
160+
_chunks = normalize_chunks(chunks, shape, dtype.itemsize)
161+
if chunk_shape:
162+
_chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize)
164163

165164
if zarr_format == 3:
166165
if dimension_separator is not None:
@@ -183,7 +182,7 @@ async def create(
183182
store_path,
184183
shape=shape,
185184
dtype=dtype,
186-
chunk_shape=chunk_shape,
185+
chunk_shape=_chunks,
187186
fill_value=fill_value,
188187
chunk_key_encoding=chunk_key_encoding,
189188
codecs=codecs,
@@ -206,7 +205,7 @@ async def create(
206205
store_path,
207206
shape=shape,
208207
dtype=dtype,
209-
chunks=chunk_shape,
208+
chunks=_chunks,
210209
dimension_separator=dimension_separator,
211210
fill_value=fill_value,
212211
order=order,
@@ -393,6 +392,10 @@ async def open(
393392
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
394393
)
395394

395+
@property
396+
def store(self) -> Store:
397+
return self.store_path.store
398+
396399
@property
397400
def ndim(self) -> int:
398401
return len(self.metadata.shape)
@@ -697,6 +700,10 @@ def open(
697700
async_array = sync(AsyncArray.open(store))
698701
return cls(async_array)
699702

703+
@property
704+
def store(self) -> Store:
705+
return self._async_array.store
706+
700707
@property
701708
def ndim(self) -> int:
702709
return self._async_array.ndim

src/zarr/core/chunk_grids.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import itertools
44
import math
5+
import numbers
56
import operator
67
from abc import abstractmethod
78
from dataclasses import dataclass
89
from functools import reduce
9-
from typing import TYPE_CHECKING
10+
from typing import TYPE_CHECKING, Any
1011

1112
import numpy as np
1213

@@ -98,6 +99,50 @@ def _guess_chunks(
9899
return tuple(int(x) for x in chunks)
99100

100101

102+
def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]:
103+
"""Convenience function to normalize the `chunks` argument for an array
104+
with the given `shape`."""
105+
106+
# N.B., expect shape already normalized
107+
108+
# handle auto-chunking
109+
if chunks is None or chunks is True:
110+
return _guess_chunks(shape, typesize)
111+
112+
# handle no chunking
113+
if chunks is False:
114+
return shape
115+
116+
# handle 1D convenience form
117+
if isinstance(chunks, numbers.Integral):
118+
chunks = tuple(int(chunks) for _ in shape)
119+
120+
# handle dask-style chunks (iterable of iterables)
121+
if all(isinstance(c, (tuple | list)) for c in chunks):
122+
# take first chunk size for each dimension
123+
chunks = (
124+
c[0] for c in chunks
125+
) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1])
126+
127+
# handle bad dimensionality
128+
if len(chunks) > len(shape):
129+
raise ValueError("too many dimensions in chunks")
130+
131+
# handle underspecified chunks
132+
if len(chunks) < len(shape):
133+
# assume chunks across remaining dimensions
134+
chunks += shape[len(chunks) :]
135+
136+
# handle None or -1 in chunks
137+
if -1 in chunks or None in chunks:
138+
chunks = tuple(
139+
s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False)
140+
)
141+
142+
out = tuple(int(c) for c in chunks)
143+
return out
144+
145+
101146
@dataclass(frozen=True)
102147
class ChunkGrid(Metadata):
103148
@classmethod

src/zarr/core/group.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,9 @@ async def open(
173173
# alternatively, we could warn and favor v3
174174
raise ValueError("Both zarr.json and .zgroup objects exist")
175175
if zarr_json_bytes is None and zgroup_bytes is None:
176-
raise FileNotFoundError(store_path)
176+
raise FileNotFoundError(
177+
f"could not find zarr.json or .zgroup objects in {store_path}"
178+
)
177179
# set zarr_format based on which keys were found
178180
if zarr_json_bytes is not None:
179181
zarr_format = 3

src/zarr/store/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,12 @@ async def make_store_path(
7878
store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None
7979
) -> StorePath:
8080
if isinstance(store_like, StorePath):
81-
if mode is not None:
82-
assert AccessMode.from_literal(mode) == store_like.store.mode
81+
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode):
82+
raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})")
8383
return store_like
8484
elif isinstance(store_like, Store):
85-
if mode is not None:
86-
assert AccessMode.from_literal(mode) == store_like.mode
85+
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode):
86+
raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})")
8787
await store_like._ensure_open()
8888
return StorePath(store_like)
8989
elif store_like is None:

0 commit comments

Comments
 (0)