Skip to content

Commit 8afdce9

Browse files
committed
Merge branch 'refactor-warnings' of https://github.com/d-v-b/zarr-python into refactor-warnings
2 parents a235660 + 8a2bb9e commit 8afdce9

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

changes/3288.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Sort dictionary keys before returning consolidated metadata to ensure deterministic output.

docs/user-guide/consolidated_metadata.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ that can be used.:
4747
>>> consolidated = zarr.open_group(store=store)
4848
>>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
4949
>>> from pprint import pprint
50-
>>> pprint(dict(sorted(consolidated_metadata.items())))
50+
>>> pprint(dict(consolidated_metadata.items()))
5151
{'a': ArrayV3Metadata(shape=(1,),
5252
data_type=Float64(endianness='little'),
5353
chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
@@ -102,6 +102,14 @@ With nested groups, the consolidated metadata is available on the children, recu
102102
>>> consolidated['child'].metadata.consolidated_metadata
103103
ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)
104104

105+
.. versionadded:: 3.1.1
106+
107+
The keys in the consolidated metadata are sorted prior to writing. Keys are
108+
sorted in ascending order by path depth, where a path is defined as a sequence
109+
of strings joined by ``"/"``. For keys with the same path length, lexicographic
110+
order is used to break the tie. This behaviour ensures deterministic metadata
111+
output for a given group.
112+
105113
Synchronization and Concurrency
106114
-------------------------------
107115

src/zarr/core/group.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import itertools
55
import json
66
import logging
7+
import unicodedata
78
import warnings
89
from collections import defaultdict
910
from dataclasses import asdict, dataclass, field, fields, replace
@@ -147,7 +148,16 @@ def to_dict(self) -> dict[str, JSON]:
147148
return {
148149
"kind": self.kind,
149150
"must_understand": self.must_understand,
150-
"metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()},
151+
"metadata": {
152+
k: v.to_dict()
153+
for k, v in sorted(
154+
self.flattened_metadata.items(),
155+
key=lambda item: (
156+
item[0].count("/"),
157+
unicodedata.normalize("NFKC", item[0]).casefold(),
158+
),
159+
)
160+
},
151161
}
152162

153163
@classmethod

tests/test_metadata/test_consolidated.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,35 @@ def test_to_dict_empty(self):
480480
}
481481
assert result == expected
482482

483+
@pytest.mark.parametrize("zarr_format", [2, 3])
484+
async def test_to_dict_order(
485+
self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat
486+
) -> None:
487+
with zarr.config.set(default_zarr_format=zarr_format):
488+
g = await group(store=memory_store)
489+
490+
# Create groups in non-lexicographix order
491+
dtype = "float32"
492+
await g.create_array(name="b", shape=(1,), dtype=dtype)
493+
child = await g.create_group("c", attributes={"key": "child"})
494+
await g.create_array(name="a", shape=(1,), dtype=dtype)
495+
496+
await child.create_array("e", shape=(1,), dtype=dtype)
497+
await child.create_array("d", shape=(1,), dtype=dtype)
498+
499+
# Consolidate metadata and re-open store
500+
await zarr.api.asynchronous.consolidate_metadata(memory_store)
501+
g2 = await zarr.api.asynchronous.open_group(store=memory_store)
502+
503+
assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"]
504+
assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [
505+
"a",
506+
"b",
507+
"c",
508+
"c/d",
509+
"c/e",
510+
]
511+
483512
@pytest.mark.parametrize("zarr_format", [2, 3])
484513
async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
485514
store = zarr.storage.MemoryStore()

0 commit comments

Comments
 (0)