Skip to content

Commit 1560d21

Browse files
agoodmjhamman
andauthored
Default zarr.open to open_group if shape is not provided (#2158)
* Default zarr.open to open_group if shape is not provided * linting * Address failing tests * Add check if store_path contains array to open() * Allow AsyncArray constructor to accept dictionary metadata * Explicitly construct array from metadata in open() * Check if metadata input is dict rather than ArrayMetadata * fixup --------- Co-authored-by: Joe Hamman <[email protected]> Co-authored-by: Joe Hamman <[email protected]>
1 parent 5ca080d commit 1560d21

File tree

3 files changed

+76
-48
lines changed

3 files changed

+76
-48
lines changed

src/zarr/api/asynchronous.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import numpy.typing as npt
99

10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, get_array_metadata
1111
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
1212
from zarr.core.config import config
1313
from zarr.core.group import AsyncGroup
@@ -230,6 +230,18 @@ async def open(
230230
if path is not None:
231231
store_path = store_path / path
232232

233+
if "shape" not in kwargs and mode in {"a", "w", "w-"}:
234+
try:
235+
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
236+
# for v2, the above would already have raised an exception if not an array
237+
zarr_format = metadata_dict["zarr_format"]
238+
is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array"
239+
if is_v3_array or zarr_format == 2:
240+
return AsyncArray(store_path=store_path, metadata=metadata_dict)
241+
except (AssertionError, FileNotFoundError):
242+
pass
243+
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
244+
233245
try:
234246
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
235247
except KeyError:

src/zarr/core/array.py

Lines changed: 57 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,53 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP
106106
raise TypeError
107107

108108

109+
async def get_array_metadata(
110+
store_path: StorePath, zarr_format: ZarrFormat | None = 3
111+
) -> dict[str, Any]:
112+
if zarr_format == 2:
113+
zarray_bytes, zattrs_bytes = await gather(
114+
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
115+
)
116+
if zarray_bytes is None:
117+
raise FileNotFoundError(store_path)
118+
elif zarr_format == 3:
119+
zarr_json_bytes = await (store_path / ZARR_JSON).get()
120+
if zarr_json_bytes is None:
121+
raise FileNotFoundError(store_path)
122+
elif zarr_format is None:
123+
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
124+
(store_path / ZARR_JSON).get(),
125+
(store_path / ZARRAY_JSON).get(),
126+
(store_path / ZATTRS_JSON).get(),
127+
)
128+
if zarr_json_bytes is not None and zarray_bytes is not None:
129+
# TODO: revisit this exception type
130+
# alternatively, we could warn and favor v3
131+
raise ValueError("Both zarr.json and .zarray objects exist")
132+
if zarr_json_bytes is None and zarray_bytes is None:
133+
raise FileNotFoundError(store_path)
134+
# set zarr_format based on which keys were found
135+
if zarr_json_bytes is not None:
136+
zarr_format = 3
137+
else:
138+
zarr_format = 2
139+
else:
140+
raise ValueError(f"unexpected zarr_format: {zarr_format}")
141+
142+
metadata_dict: dict[str, Any]
143+
if zarr_format == 2:
144+
# V2 arrays are comprised of a .zarray and .zattrs objects
145+
assert zarray_bytes is not None
146+
metadata_dict = json.loads(zarray_bytes.to_bytes())
147+
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
148+
metadata_dict["attributes"] = zattrs_dict
149+
else:
150+
# V3 arrays are comprised of a zarr.json object
151+
assert zarr_json_bytes is not None
152+
metadata_dict = json.loads(zarr_json_bytes.to_bytes())
153+
return metadata_dict
154+
155+
109156
@dataclass(frozen=True)
110157
class AsyncArray:
111158
metadata: ArrayMetadata
@@ -115,10 +162,17 @@ class AsyncArray:
115162

116163
def __init__(
117164
self,
118-
metadata: ArrayMetadata,
165+
metadata: ArrayMetadata | dict[str, Any],
119166
store_path: StorePath,
120167
order: Literal["C", "F"] | None = None,
121168
) -> None:
169+
if isinstance(metadata, dict):
170+
zarr_format = metadata["zarr_format"]
171+
if zarr_format == 2:
172+
metadata = ArrayV2Metadata.from_dict(metadata)
173+
else:
174+
metadata = ArrayV3Metadata.from_dict(metadata)
175+
122176
metadata_parsed = parse_array_metadata(metadata)
123177
order_parsed = parse_indexing_order(order or config.get("array.order"))
124178

@@ -341,51 +395,8 @@ async def open(
341395
zarr_format: ZarrFormat | None = 3,
342396
) -> AsyncArray:
343397
store_path = await make_store_path(store)
344-
345-
if zarr_format == 2:
346-
zarray_bytes, zattrs_bytes = await gather(
347-
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
348-
)
349-
if zarray_bytes is None:
350-
raise FileNotFoundError(store_path)
351-
elif zarr_format == 3:
352-
zarr_json_bytes = await (store_path / ZARR_JSON).get()
353-
if zarr_json_bytes is None:
354-
raise FileNotFoundError(store_path)
355-
elif zarr_format is None:
356-
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
357-
(store_path / ZARR_JSON).get(),
358-
(store_path / ZARRAY_JSON).get(),
359-
(store_path / ZATTRS_JSON).get(),
360-
)
361-
if zarr_json_bytes is not None and zarray_bytes is not None:
362-
# TODO: revisit this exception type
363-
# alternatively, we could warn and favor v3
364-
raise ValueError("Both zarr.json and .zarray objects exist")
365-
if zarr_json_bytes is None and zarray_bytes is None:
366-
raise FileNotFoundError(store_path)
367-
# set zarr_format based on which keys were found
368-
if zarr_json_bytes is not None:
369-
zarr_format = 3
370-
else:
371-
zarr_format = 2
372-
else:
373-
raise ValueError(f"unexpected zarr_format: {zarr_format}")
374-
375-
if zarr_format == 2:
376-
# V2 arrays are comprised of a .zarray and .zattrs objects
377-
assert zarray_bytes is not None
378-
zarray_dict = json.loads(zarray_bytes.to_bytes())
379-
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
380-
zarray_dict["attributes"] = zattrs_dict
381-
return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict))
382-
else:
383-
# V3 arrays are comprised of a zarr.json object
384-
assert zarr_json_bytes is not None
385-
return cls(
386-
store_path=store_path,
387-
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
388-
)
398+
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
399+
return cls(store_path=store_path, metadata=metadata_dict)
389400

390401
@property
391402
def ndim(self) -> int:

tests/v3/test_api.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,12 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None:
140140
z2[:] = 3
141141

142142

143-
def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
143+
async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
144+
# Open without shape argument should default to group
145+
g = zarr.open(store=tmp_path, mode="a")
146+
assert isinstance(g, Group)
147+
await g.store_path.delete()
148+
144149
# 'a' means read/write (create if doesn't exist)
145150
arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3))
146151
assert isinstance(arr, Array)

0 commit comments

Comments
 (0)