Skip to content

Commit bb12c76

Browse files
committed
Refactor group metadata and metadata IO routines
1 parent 07c56b7 commit bb12c76

File tree

13 files changed

+739
-571
lines changed

13 files changed

+739
-571
lines changed

src/zarr/api/asynchronous.py

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import asyncio
44
import dataclasses
55
import warnings
6-
from typing import TYPE_CHECKING, Any, Literal, cast
6+
from typing import TYPE_CHECKING, Any, Literal
77

88
import numpy as np
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
12+
from zarr.core.array import Array, AsyncArray, create_array, from_array
1313
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1414
from zarr.core.buffer import NDArrayLike
1515
from zarr.core.common import (
@@ -25,17 +25,18 @@
2525
)
2626
from zarr.core.group import (
2727
AsyncGroup,
28-
ConsolidatedMetadata,
29-
GroupMetadata,
3028
create_hierarchy,
29+
get_node,
3130
)
32-
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
31+
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
32+
from zarr.core.metadata.group import ConsolidatedMetadata, GroupMetadata
3333
from zarr.core.metadata.v2 import _default_compressor, _default_filters
3434
from zarr.errors import (
3535
ArrayNotFoundError,
36+
ContainsArrayError,
37+
ContainsGroupError,
3638
GroupNotFoundError,
3739
NodeNotFoundError,
38-
NodeTypeValidationError,
3940
)
4041
from zarr.storage._common import make_store_path
4142

@@ -320,27 +321,46 @@ async def open(
320321
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
321322

322323
store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options)
323-
# TODO: the mode check below seems wrong!
324-
if "shape" not in kwargs and mode in {"a", "r", "r+", "w"}:
324+
325+
extant_node: AsyncGroup | AsyncArray[Any] = None
326+
# All of these modes will defer to an existing mode
327+
if mode in {"a", "r", "r+", "w-"}:
325328
try:
326-
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
327-
# TODO: remove this cast when we fix typing for array metadata dicts
328-
_metadata_dict = cast(ArrayMetadataDict, metadata_dict)
329-
# for v2, the above would already have raised an exception if not an array
330-
zarr_format = _metadata_dict["zarr_format"]
331-
is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array"
332-
if is_v3_array or zarr_format == 2:
333-
return AsyncArray(store_path=store_path, metadata=_metadata_dict)
334-
except (AssertionError, ArrayNotFoundError, NodeTypeValidationError):
329+
extant_node = await get_node(
330+
store=store_path.store, path=store_path.path, zarr_format=zarr_format
331+
)
332+
except NodeNotFoundError:
335333
pass
336-
try:
337-
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
338-
except GroupNotFoundError as e:
334+
# we successfully found an existing node
335+
if extant_node is not None:
336+
# an existing node is an error if mode == w-
337+
if mode == "w-":
338+
if isinstance(extant_node, AsyncArray):
339+
node_type = "array"
340+
exc = ContainsArrayError
341+
else:
342+
node_type = "group"
343+
exc = ContainsGroupError
344+
msg = (
345+
f"A Zarr V{extant_node.zarr_format} {node_type} exists in store "
346+
f"{store_path.store!r} at path {store_path.path!r}. "
347+
f"Attempting to open a pre-existing {node_type} with access mode {mode} is an error. "
348+
f"Remove the {node_type} from storage, or use an access mode that is compatible with "
349+
"a pre-existing array, such as one of ('r','r+','a','w')."
350+
)
351+
raise exc(msg)
352+
else:
353+
# otherwise, return the existing node
354+
return extant_node
355+
else:
356+
if mode in ("r", "r+"):
339357
msg = (
340-
"'Neither array nor group metadata were found in '"
341-
f"store {store_path.store} at path {store_path.path!r}"
358+
f"Neither array nor group metadata were found in store {store_path.store!r} at "
359+
f"path {store_path.path!r}. Attempting to open an non-existent node with access mode "
360+
f"{mode} is an error. Create an array or group first, or use an access mode that "
361+
"create an array or group, such as one of ('a', 'w', 'w-')."
342362
)
343-
raise NodeNotFoundError(msg) from e
363+
raise NodeNotFoundError(msg)
344364
if "shape" in kwargs:
345365
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
346366
else:

src/zarr/core/array.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100
ArrayV3MetadataDict,
101101
T_ArrayMetadata,
102102
)
103+
from zarr.core.metadata._io import _read_array_metadata
104+
from zarr.core.metadata.group import GroupMetadata
103105
from zarr.core.metadata.v2 import (
104106
_default_compressor,
105107
_default_filters,
@@ -161,7 +163,7 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
161163
raise TypeError
162164

163165

164-
async def get_array_metadata(
166+
async def xget_array_metadata(
165167
store_path: StorePath, zarr_format: ZarrFormat | None = 3
166168
) -> dict[str, JSON]:
167169
if zarr_format == 2:
@@ -913,10 +915,10 @@ async def open(
913915
<AsyncArray memory://... shape=(100, 100) dtype=int32>
914916
"""
915917
store_path = await make_store_path(store)
916-
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
917-
# TODO: remove this cast when we have better type hints
918-
_metadata_dict = cast(ArrayV3MetadataDict, metadata_dict)
919-
return cls(store_path=store_path, metadata=_metadata_dict)
918+
metadata_dict = await _read_array_metadata(
919+
store_path.store, store_path.path, zarr_format=zarr_format
920+
)
921+
return cls(store_path=store_path, metadata=metadata_dict)
920922

921923
@property
922924
def store(self) -> Store:
@@ -3756,7 +3758,7 @@ async def chunks_initialized(
37563758
def _build_parents(
37573759
node: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup,
37583760
) -> list[AsyncGroup]:
3759-
from zarr.core.group import AsyncGroup, GroupMetadata
3761+
from zarr.core.group import AsyncGroup
37603762

37613763
store = node.store_path.store
37623764
path = node.store_path.path

0 commit comments

Comments
 (0)