Skip to content

Commit afdc320

Browse files
committed
put sync wrappers in sync_group module, move utils to utils
1 parent d7d0070 commit afdc320

File tree

8 files changed

+267
-175
lines changed

8 files changed

+267
-175
lines changed

src/zarr/api/asynchronous.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@
2828
ConsolidatedMetadata,
2929
GroupMetadata,
3030
create_hierarchy,
31-
create_nodes,
32-
create_rooted_hierarchy,
33-
get_node,
3431
)
3532
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
3633
from zarr.core.metadata.v2 import _default_compressor, _default_filters
@@ -57,13 +54,10 @@
5754
"create",
5855
"create_array",
5956
"create_hierarchy",
60-
"create_nodes",
61-
"create_rooted_hierarchy",
6257
"empty",
6358
"empty_like",
6459
"full",
6560
"full_like",
66-
"get_node",
6761
"group",
6862
"load",
6963
"ones",

src/zarr/api/synchronous.py

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.abc.store import Store
1110
from zarr.core.array import Array, AsyncArray
12-
from zarr.core.group import Group, GroupMetadata, _parse_async_node
13-
from zarr.core.sync import _collect_aiterator, sync
11+
from zarr.core.group import Group
12+
from zarr.core.sync import sync
13+
from zarr.core.sync_group import create_hierarchy
1414

1515
if TYPE_CHECKING:
16-
from collections.abc import Iterable, Iterator
16+
from collections.abc import Iterable
1717

1818
import numpy as np
1919
import numpy.typing as npt
2020

2121
from zarr.abc.codec import Codec
22-
from zarr.abc.store import Store
2322
from zarr.api.asynchronous import ArrayLike, PathLike
2423
from zarr.core.array import (
2524
CompressorsLike,
@@ -38,7 +37,6 @@
3837
ShapeLike,
3938
ZarrFormat,
4039
)
41-
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
4240
from zarr.storage import StoreLike
4341

4442
__all__ = [
@@ -1136,42 +1134,3 @@ def zeros_like(a: ArrayLike, **kwargs: Any) -> Array:
11361134
The new array.
11371135
"""
11381136
return Array(sync(async_api.zeros_like(a, **kwargs)))
1139-
1140-
1141-
def create_hierarchy(
1142-
*,
1143-
store: Store,
1144-
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
1145-
overwrite: bool = False,
1146-
) -> Iterator[tuple[str, Group | Array]]:
1147-
"""
1148-
Create a complete zarr hierarchy from a collection of metadata objects.
1149-
1150-
Groups that are implicitly defined by the input will be created as needed.
1151-
1152-
This function takes a parsed hierarchy dictionary and creates all the nodes in the hierarchy
1153-
concurrently. Arrays and Groups are yielded in the order they are created. This order is not
1154-
deterministic.
1155-
1156-
Parameters
1157-
----------
1158-
store : Store
1159-
The storage backend to use.
1160-
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
1161-
A dictionary defining the hierarchy. The keys are the paths of the nodes
1162-
in the hierarchy, and the values are the metadata of the nodes. The
1163-
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
1164-
or ArrayV2Metadata.
1165-
overwrite : bool
1166-
Whether to overwrite existing nodes. Defaults to ``False``, in which case an error will be
1167-
raised instead of overwriting an existing array or group.
1168-
1169-
Yields
1170-
------
1171-
tuple[str, Group | Array]
1172-
(key, node) pairs the order they are created.
1173-
"""
1174-
coro = async_api.create_hierarchy(store=store, nodes=nodes, overwrite=overwrite)
1175-
1176-
for key, value in sync(_collect_aiterator(coro)):
1177-
yield key, _parse_async_node(value)

src/zarr/core/group.py

Lines changed: 1 addition & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError
5555
from zarr.storage import StoreLike, StorePath
5656
from zarr.storage._common import ensure_no_existing_node, make_store_path
57-
from zarr.storage._utils import normalize_path
57+
from zarr.storage._utils import _join_paths, _normalize_path_keys, normalize_path
5858

5959
if TYPE_CHECKING:
6060
from collections.abc import (
@@ -3063,9 +3063,6 @@ async def create_nodes(
30633063
continue
30643064

30653065

3066-
T = TypeVar("T")
3067-
3068-
30693066
def _get_roots(
30703067
data: Iterable[str],
30713068
) -> tuple[str, ...]:
@@ -3082,15 +3079,6 @@ def _get_roots(
30823079
return tuple(groups[min(groups.keys())])
30833080

30843081

3085-
def _join_paths(paths: Iterable[str]) -> str:
3086-
"""
3087-
Filter out instances of '' and join the remaining strings with '/'.
3088-
3089-
Because the root node of a zarr hierarchy is represented by an empty string,
3090-
"""
3091-
return "/".join(filter(lambda v: v != "", paths))
3092-
3093-
30943082
def _parse_hierarchy_dict(
30953083
*,
30963084
data: Mapping[str, ImplicitGroupMarker | GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
@@ -3180,36 +3168,6 @@ def _ensure_consistent_zarr_format(
31803168
)
31813169

31823170

3183-
def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
3184-
"""
3185-
Normalize the input paths according to the normalization scheme used for zarr node paths.
3186-
If any two paths normalize to the same value, raise a ValueError.
3187-
"""
3188-
path_map: dict[str, str] = {}
3189-
for path in paths:
3190-
parsed = normalize_path(path)
3191-
if parsed in path_map:
3192-
msg = (
3193-
f"After normalization, the value '{path}' collides with '{path_map[parsed]}'. "
3194-
f"Both '{path}' and '{path_map[parsed]}' normalize to the same value: '{parsed}'. "
3195-
f"You should use either '{path}' or '{path_map[parsed]}', but not both."
3196-
)
3197-
raise ValueError(msg)
3198-
path_map[parsed] = path
3199-
return tuple(path_map.keys())
3200-
3201-
3202-
def _normalize_path_keys(data: Mapping[str, T]) -> dict[str, T]:
3203-
"""
3204-
Normalize the keys of the input dict according to the normalization scheme used for zarr node
3205-
paths. If any two keys in the input normalize to the same value, raise a ValueError.
3206-
Returns a dict where the keys are the elements of the input and the values are the
3207-
normalized form of each key.
3208-
"""
3209-
parsed_keys = _normalize_paths(data.keys())
3210-
return dict(zip(parsed_keys, data.values(), strict=True))
3211-
3212-
32133171
async def _getitem_semaphore(
32143172
node: AsyncGroup, key: str, semaphore: asyncio.Semaphore | None
32153173
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] | AsyncGroup:

src/zarr/core/sync_group.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
from zarr.core.group import Group, GroupMetadata, _parse_async_node
6+
from zarr.core.group import create_hierarchy as create_hierarchy_async
7+
from zarr.core.group import create_nodes as create_nodes_async
8+
from zarr.core.group import create_rooted_hierarchy as create_rooted_hierarchy_async
9+
from zarr.core.group import get_node as get_node_async
10+
from zarr.core.sync import _collect_aiterator, sync
11+
12+
if TYPE_CHECKING:
13+
from collections.abc import Iterator
14+
15+
from zarr.abc.store import Store
16+
from zarr.core.array import Array
17+
from zarr.core.common import ZarrFormat
18+
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
19+
20+
21+
def create_nodes(
22+
*, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata]
23+
) -> Iterator[tuple[str, Group | Array]]:
24+
"""Create a collection of arrays and / or groups concurrently.
25+
26+
Note: no attempt is made to validate that these arrays and / or groups collectively form a
27+
valid Zarr hierarchy. It is the responsibility of the caller of this function to ensure that
28+
the ``nodes`` parameter satisfies any correctness constraints.
29+
30+
Parameters
31+
----------
32+
store : Store
33+
The storage backend to use.
34+
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
35+
A dictionary defining the hierarchy. The keys are the paths of the nodes
36+
in the hierarchy, and the values are the metadata of the nodes. The
37+
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
38+
or ArrayV2Metadata.
39+
40+
Yields
41+
------
42+
Group | Array
43+
The created nodes.
44+
"""
45+
coro = create_nodes_async(store=store, nodes=nodes)
46+
47+
for key, value in sync(_collect_aiterator(coro)):
48+
yield key, _parse_async_node(value)
49+
50+
51+
def create_hierarchy(
52+
*,
53+
store: Store,
54+
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
55+
overwrite: bool = False,
56+
) -> Iterator[tuple[str, Group | Array]]:
57+
"""
58+
Create a complete zarr hierarchy from a collection of metadata objects.
59+
60+
Groups that are implicitly defined by the input will be created as needed.
61+
62+
This function takes a parsed hierarchy dictionary and creates all the nodes in the hierarchy
63+
concurrently. Arrays and Groups are yielded in the order they are created.
64+
65+
Parameters
66+
----------
67+
store : Store
68+
The storage backend to use.
69+
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
70+
A dictionary defining the hierarchy. The keys are the paths of the nodes
71+
in the hierarchy, and the values are the metadata of the nodes. The
72+
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
73+
or ArrayV2Metadata.
74+
75+
Yields
76+
------
77+
Group | Array
78+
The created nodes in the order they are created.
79+
"""
80+
coro = create_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite)
81+
82+
for key, value in sync(_collect_aiterator(coro)):
83+
yield key, _parse_async_node(value)
84+
85+
86+
def create_rooted_hierarchy(
87+
*,
88+
store: Store,
89+
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
90+
overwrite: bool = False,
91+
) -> Group | Array:
92+
"""
93+
Create a Zarr hierarchy with a root, and return the root node, which could be a ``Group``
94+
or ``Array`` instance.
95+
96+
Parameters
97+
----------
98+
store : Store
99+
The storage backend to use.
100+
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
101+
A dictionary defining the hierarchy. The keys are the paths of the nodes
102+
in the hierarchy, and the values are the metadata of the nodes. The
103+
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
104+
or ArrayV2Metadata.
105+
overwrite : bool
106+
Whether to overwrite existing nodes. Default is ``False``.
107+
108+
Returns
109+
-------
110+
Group | Array
111+
"""
112+
async_node = sync(create_rooted_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite))
113+
return _parse_async_node(async_node)
114+
115+
116+
def get_node(store: Store, path: str, zarr_format: ZarrFormat) -> Array | Group:
117+
"""
118+
Get an Array or Group from a path in a Store.
119+
120+
Parameters
121+
----------
122+
store : Store
123+
The store-like object to read from.
124+
path : str
125+
The path to the node to read.
126+
zarr_format : {2, 3}
127+
The zarr format of the node to read.
128+
129+
Returns
130+
-------
131+
Array | Group
132+
"""
133+
134+
return _parse_async_node(sync(get_node_async(store=store, path=path, zarr_format=zarr_format)))

src/zarr/storage/_utils.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
import re
44
from pathlib import Path
5-
from typing import TYPE_CHECKING
5+
from typing import TYPE_CHECKING, TypeVar
66

77
from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest
88

99
if TYPE_CHECKING:
10+
from collections.abc import Iterable, Mapping
11+
1012
from zarr.abc.store import ByteRequest
1113
from zarr.core.buffer import Buffer
1214

@@ -66,3 +68,45 @@ def _normalize_byte_range_index(data: Buffer, byte_range: ByteRequest | None) ->
6668
else:
6769
raise ValueError(f"Unexpected byte_range, got {byte_range}.")
6870
return (start, stop)
71+
72+
73+
def _join_paths(paths: Iterable[str]) -> str:
74+
"""
75+
Filter out instances of '' and join the remaining strings with '/'.
76+
77+
Because the root node of a zarr hierarchy is represented by an empty string,
78+
"""
79+
return "/".join(filter(lambda v: v != "", paths))
80+
81+
82+
def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
83+
"""
84+
Normalize the input paths according to the normalization scheme used for zarr node paths.
85+
If any two paths normalize to the same value, raise a ValueError.
86+
"""
87+
path_map: dict[str, str] = {}
88+
for path in paths:
89+
parsed = normalize_path(path)
90+
if parsed in path_map:
91+
msg = (
92+
f"After normalization, the value '{path}' collides with '{path_map[parsed]}'. "
93+
f"Both '{path}' and '{path_map[parsed]}' normalize to the same value: '{parsed}'. "
94+
f"You should use either '{path}' or '{path_map[parsed]}', but not both."
95+
)
96+
raise ValueError(msg)
97+
path_map[parsed] = path
98+
return tuple(path_map.keys())
99+
100+
101+
T = TypeVar("T")
102+
103+
104+
def _normalize_path_keys(data: Mapping[str, T]) -> dict[str, T]:
105+
"""
106+
Normalize the keys of the input dict according to the normalization scheme used for zarr node
107+
paths. If any two keys in the input normalize to the same value, raise a ValueError.
108+
Returns a dict where the keys are the elements of the input and the values are the
109+
normalized form of each key.
110+
"""
111+
parsed_keys = _normalize_paths(data.keys())
112+
return dict(zip(parsed_keys, data.values(), strict=True))

0 commit comments

Comments
 (0)