Skip to content

Commit cdfd5de

Browse files
committed
remove implicit group metadata, and add some key name normalization
1 parent 4562e86 commit cdfd5de

File tree

2 files changed

+36
-83
lines changed

2 files changed

+36
-83
lines changed

src/zarr/core/group.py

Lines changed: 25 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,7 +2881,8 @@ async def create_hierarchy(
28812881
The created nodes in the order they are created.
28822882
"""
28832883
nodes_parsed = _parse_hierarchy_dict(nodes)
2884-
2884+
if overwrite:
2885+
await store_path.delete_dir()
28852886
async for node in create_nodes(store_path=store_path, nodes=nodes_parsed, semaphore=semaphore):
28862887
yield node
28872888

@@ -2904,17 +2905,24 @@ async def create_nodes(
29042905

29052906
create_tasks: list[Coroutine[None, None, str]] = []
29062907
for key, value in nodes.items():
2907-
write_key = str(PurePosixPath(store_path.path) / key)
2908+
write_key = f"{store_path.path}/{key}".lstrip("/")
29082909
create_tasks.extend(_persist_metadata(store_path.store, write_key, value))
29092910

29102911
created_keys = []
29112912
async with ctx:
29122913
for coro in asyncio.as_completed(create_tasks):
29132914
created_key = await coro
2914-
relative_path = PurePosixPath(created_key).relative_to(store_path.path)
2915-
created_keys.append(str(relative_path))
2916-
# convert foo/bar/baz/.zattrs to bar/baz
2917-
node_name = str(relative_path.parent)
2915+
# the created key will be in the store key space. we have to remove the store_path.path
2916+
# component of that path to bring it back to the relative key space of store_path
2917+
2918+
relative_path = created_key.removeprefix(store_path.path).lstrip("/")
2919+
created_keys.append(relative_path)
2920+
2921+
if len(relative_path.split("/")) == 1:
2922+
node_name = ""
2923+
else:
2924+
node_name = "/".join(["", *relative_path.split("/")[:-1]])
2925+
29182926
meta_out = nodes[node_name]
29192927

29202928
if meta_out.zarr_format == 3:
@@ -2928,18 +2936,19 @@ async def create_nodes(
29282936
# so we track which keys have been created, and wait for both the meta key and
29292937
# the attrs key to be created before yielding back the AsyncArray / AsyncGroup
29302938

2931-
attrs_done = f"{node_name}/.zattrs" in created_keys
2939+
attrs_done = f"{node_name}/.zattrs".lstrip("/") in created_keys
29322940

29332941
if isinstance(meta_out, GroupMetadata):
2934-
meta_done = f"{node_name}/.zgroup" in created_keys
2942+
meta_done = f"{node_name}/.zgroup".lstrip("/") in created_keys
29352943
else:
2936-
meta_done = f"{node_name}/.zarray" in created_keys
2944+
meta_done = f"{node_name}/.zarray".lstrip("/") in created_keys
29372945

29382946
if meta_done and attrs_done:
29392947
if isinstance(meta_out, GroupMetadata):
29402948
yield AsyncGroup(metadata=meta_out, store_path=store_path / node_name)
29412949
else:
29422950
yield AsyncArray(metadata=meta_out, store_path=store_path / node_name)
2951+
continue
29432952

29442953

29452954
T = TypeVar("T")
@@ -3006,9 +3015,9 @@ def _parse_hierarchy_dict(
30063015
# Iterate over the intermediate path components
30073016
*subpaths, _ = accumulate(key_split, lambda a, b: f"{a}/{b}")
30083017
for subpath in subpaths:
3009-
# If a component is not already in the output dict, add an implicit group marker
3018+
# If a component is not already in the output dict, add a group
30103019
if subpath not in out:
3011-
out[subpath] = _ImplicitGroupMetadata(zarr_format=v.zarr_format)
3020+
out[subpath] = GroupMetadata(zarr_format=v.zarr_format)
30123021
else:
30133022
if not isinstance(out[subpath], GroupMetadata):
30143023
msg = (
@@ -3245,50 +3254,14 @@ def _persist_metadata(
32453254
"""
32463255

32473256
to_save = metadata.to_buffer_dict(default_buffer_prototype())
3248-
if isinstance(metadata, _ImplicitGroupMetadata):
3249-
replace = False
3250-
else:
3251-
replace = True
3252-
# TODO: should this function be a generator that yields values instead of eagerly returning a tuple?
32533257
return tuple(
3254-
_set_return_key(store=store, key=f"{path}/{key}", value=value, replace=replace)
3258+
_set_return_key(store=store, key=f"{path}/{key}".lstrip("/"), value=value, replace=True)
32553259
for key, value in to_save.items()
32563260
)
32573261

32583262

3259-
class _ImplicitGroupMetadata(GroupMetadata):
3260-
"""
3261-
This class represents the metadata document of a group that should be created at a
3262-
location in storage if and only if there is not already a group at that location.
3263-
3264-
This class is used to fill group-shaped "holes" in a dict specification of a Zarr hierarchy.
3265-
3266-
When attempting to write this class to storage, the writer should first check if a Zarr group
3267-
already exists at the desired location. If such a group does exist, the writer should do nothing.
3268-
If not, the writer should write this metadata document to storage.
3269-
3270-
"""
3271-
3272-
def __init__(
3273-
self,
3274-
attributes: dict[str, Any] | None = None,
3275-
zarr_format: ZarrFormat = 3,
3276-
consolidated_metadata: ConsolidatedMetadata | None = None,
3277-
) -> None:
3278-
if attributes is not None:
3279-
raise ValueError("attributes must be None for implicit groups")
3280-
3281-
if consolidated_metadata is not None:
3282-
raise ValueError("consolidated_metadata must be None for implicit groups")
3283-
3284-
super().__init__(attributes, zarr_format, consolidated_metadata)
3285-
3286-
def to_dict(self) -> dict[str, JSON]:
3287-
return asdict(self)
3288-
3289-
32903263
async def _from_flat(
3291-
store: StoreLike,
3264+
store_path: StorePath,
32923265
*,
32933266
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
32943267
overwrite: bool = False,
@@ -3308,16 +3281,13 @@ async def _from_flat(
33083281
else:
33093282
root = roots[0]
33103283

3311-
if overwrite:
3312-
store_path = await make_store_path(store, mode="w")
3313-
else:
3314-
store_path = await make_store_path(store, mode="w-")
3315-
33163284
semaphore = asyncio.Semaphore(config.get("async.concurrency"))
33173285

33183286
nodes_created = {
33193287
x.path: x
3320-
async for x in create_hierarchy(store_path=store_path, nodes=nodes, semaphore=semaphore)
3288+
async for x in create_hierarchy(
3289+
store_path=store_path, nodes=nodes, semaphore=semaphore, overwrite=overwrite
3290+
)
33213291
}
33223292
root_group = nodes_created[root]
33233293
if not isinstance(root_group, AsyncGroup):

tests/test_group.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
ConsolidatedMetadata,
2626
GroupMetadata,
2727
_from_flat,
28-
_ImplicitGroupMetadata,
2928
create_hierarchy,
3029
create_nodes,
3130
)
@@ -1474,7 +1473,8 @@ async def test_create_nodes(store: Store, zarr_format: ZarrFormat) -> None:
14741473

14751474

14761475
@pytest.mark.parametrize("store", ["memory"], indirect=True)
1477-
async def test_create_hierarchy(store: Store, zarr_format: ZarrFormat) -> None:
1476+
@pytest.mark.parametrize("overwrite", [True, False])
1477+
async def test_create_hierarchy(store: Store, overwrite: bool, zarr_format: ZarrFormat) -> None:
14781478
"""
14791479
Test that ``create_hierarchy`` can create a complete Zarr hierarchy, even if the input describes
14801480
an incomplete one.
@@ -1487,11 +1487,15 @@ async def test_create_hierarchy(store: Store, zarr_format: ZarrFormat) -> None:
14871487
"group/subgroup/array_0": meta_from_array(np.arange(4), zarr_format=zarr_format),
14881488
"group/subgroup/array_1": meta_from_array(np.arange(5), zarr_format=zarr_format),
14891489
}
1490+
pre_existing_nodes = {"extra": GroupMetadata(zarr_format=zarr_format)}
1491+
# we expect create_hierarchy to insert a group that was missing from the hierarchy spec
14901492
expected_meta = hierarchy_spec | {"group/subgroup": GroupMetadata(zarr_format=zarr_format)}
14911493
spath = await make_store_path(store, path=path)
1494+
# initialize the group with some nodes
1495+
await _collect_aiterator(_from_flat(store_path=spath, nodes=pre_existing_nodes))
14921496
observed_nodes = {
14931497
str(PurePosixPath(a.name).relative_to("/" + path)): a
1494-
async for a in create_hierarchy(store_path=spath, nodes=expected_meta)
1498+
async for a in create_hierarchy(store_path=spath, nodes=expected_meta, overwrite=overwrite)
14951499
}
14961500
assert expected_meta == {k: v.metadata for k, v in observed_nodes.items()}
14971501

@@ -1587,11 +1591,12 @@ async def test_create_hierarchy_invalid_mixed_format(store: Store):
15871591
@pytest.mark.parametrize("store", ["memory"], indirect=True)
15881592
@pytest.mark.parametrize("zarr_format", [2, 3])
15891593
@pytest.mark.parametrize("root_key", ["", "a", "a/b"])
1590-
async def test_group_from_flat(store: Store, zarr_format, root_key: str):
1594+
@pytest.mark.parametrize("path", ["", "foo"])
1595+
async def test_group_from_flat(store: Store, zarr_format, path: str, root_key: str):
15911596
"""
15921597
Test that the AsyncGroup.from_flat method creates a zarr group in one shot.
15931598
"""
1594-
root_key = "a"
1599+
spath = await make_store_path(store, path=path)
15951600
root_meta = {root_key: GroupMetadata(zarr_format=zarr_format, attributes={"path": root_key})}
15961601
members_expected_meta = {
15971602
f"{root_key}/b": GroupMetadata(
@@ -1601,7 +1606,7 @@ async def test_group_from_flat(store: Store, zarr_format, root_key: str):
16011606
zarr_format=zarr_format, attributes={"path": f"{root_key}/b/c"}
16021607
),
16031608
}
1604-
g = await _from_flat(store, nodes=root_meta | members_expected_meta)
1609+
g = await _from_flat(spath, nodes=root_meta | members_expected_meta)
16051610
members = await _collect_aiterator(g.members(max_depth=None))
16061611
members_observed_meta = {k: v.metadata for k, v in members}
16071612
members_expected_meta_relative = {
@@ -1610,28 +1615,6 @@ async def test_group_from_flat(store: Store, zarr_format, root_key: str):
16101615
assert members_observed_meta == members_expected_meta_relative
16111616

16121617

1613-
@pytest.mark.parametrize("store", ["memory"], indirect=True)
1614-
@pytest.mark.parametrize("zarr_format", [2, 3])
1615-
async def test_create_hierarchy_implicit_groups(store: Store, zarr_format):
1616-
"""
1617-
Test that writing a hierarchy with implicit groups does not result in altering an existing group
1618-
"""
1619-
spath = await make_store_path(store, path="")
1620-
key = "a"
1621-
attrs = {"name": key}
1622-
_ = await _from_flat(
1623-
store,
1624-
nodes={
1625-
key: GroupMetadata(zarr_format=zarr_format, attributes=attrs),
1626-
},
1627-
)
1628-
1629-
_ = await _collect_aiterator(
1630-
create_nodes(store_path=spath, nodes={key: _ImplicitGroupMetadata(zarr_format=zarr_format)})
1631-
)
1632-
assert zarr.open_group(store, path=key).metadata.attributes == attrs
1633-
1634-
16351618
@pytest.mark.parametrize("store", ["memory"], indirect=True)
16361619
def test_group_members_performance(store: Store) -> None:
16371620
"""

0 commit comments

Comments
 (0)