Skip to content

Commit ef31c5b

Browse files
authored
Merge branch 'main' into feat/numcodecs-protocol
2 parents 8e50ef8 + e410173 commit ef31c5b

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

changes/3288.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Sort dictionary keys before returning consolidated metadata to ensure deterministic output.

docs/user-guide/consolidated_metadata.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ that can be used.:
4545
>>> consolidated = zarr.open_group(store=store)
4646
>>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
4747
>>> from pprint import pprint
48-
>>> pprint(dict(sorted(consolidated_metadata.items())))
48+
>>> pprint(dict(consolidated_metadata.items()))
4949
{'a': ArrayV3Metadata(shape=(1,),
5050
data_type=Float64(endianness='little'),
5151
chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
@@ -100,6 +100,14 @@ With nested groups, the consolidated metadata is available on the children, recu
100100
>>> consolidated['child'].metadata.consolidated_metadata
101101
ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)
102102

103+
.. versionadded:: 3.1.1
104+
105+
The keys in the consolidated metadata are sorted prior to writing. Keys are
106+
sorted in ascending order by path depth, where a path is defined as a sequence
107+
of strings joined by ``"/"``. For keys with the same path length, lexicographic
108+
order is used to break the tie. This behaviour ensures deterministic metadata
109+
output for a given group.
110+
103111
Synchronization and Concurrency
104112
-------------------------------
105113

src/zarr/core/group.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import itertools
55
import json
66
import logging
7+
import unicodedata
78
import warnings
89
from collections import defaultdict
910
from dataclasses import asdict, dataclass, field, fields, replace
@@ -141,7 +142,16 @@ def to_dict(self) -> dict[str, JSON]:
141142
return {
142143
"kind": self.kind,
143144
"must_understand": self.must_understand,
144-
"metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()},
145+
"metadata": {
146+
k: v.to_dict()
147+
for k, v in sorted(
148+
self.flattened_metadata.items(),
149+
key=lambda item: (
150+
item[0].count("/"),
151+
unicodedata.normalize("NFKC", item[0]).casefold(),
152+
),
153+
)
154+
},
145155
}
146156

147157
@classmethod

tests/test_metadata/test_consolidated.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,35 @@ def test_to_dict_empty(self):
467467
}
468468
assert result == expected
469469

470+
@pytest.mark.parametrize("zarr_format", [2, 3])
471+
async def test_to_dict_order(
472+
self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat
473+
) -> None:
474+
with zarr.config.set(default_zarr_format=zarr_format):
475+
g = await group(store=memory_store)
476+
477+
# Create groups in non-lexicographix order
478+
dtype = "float32"
479+
await g.create_array(name="b", shape=(1,), dtype=dtype)
480+
child = await g.create_group("c", attributes={"key": "child"})
481+
await g.create_array(name="a", shape=(1,), dtype=dtype)
482+
483+
await child.create_array("e", shape=(1,), dtype=dtype)
484+
await child.create_array("d", shape=(1,), dtype=dtype)
485+
486+
# Consolidate metadata and re-open store
487+
await zarr.api.asynchronous.consolidate_metadata(memory_store)
488+
g2 = await zarr.api.asynchronous.open_group(store=memory_store)
489+
490+
assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"]
491+
assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [
492+
"a",
493+
"b",
494+
"c",
495+
"c/d",
496+
"c/e",
497+
]
498+
470499
@pytest.mark.parametrize("zarr_format", [2, 3])
471500
async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
472501
store = zarr.storage.MemoryStore()

0 commit comments

Comments
 (0)