From b38cddbe62ac852bcfe3ae3650105047115b67d5 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Wed, 23 Jul 2025 21:25:45 +0200 Subject: [PATCH 1/7] Sort dictionary keys before returning consolidated metadata --- src/zarr/core/group.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index a398aa01aa..51bd09b975 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -141,7 +141,13 @@ def to_dict(self) -> dict[str, JSON]: return { "kind": self.kind, "must_understand": self.must_understand, - "metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()}, + "metadata": { + k: v.to_dict() + for k, v in sorted( + self.flattened_metadata.items(), + key=lambda item: (item[0].count("/"), item[0]), + ) + }, } @classmethod From fa0f30fdfa19729bcdb7a6d44d9f07b26d2dfc73 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Fri, 25 Jul 2025 07:48:42 +0200 Subject: [PATCH 2/7] Normalize metadata key before sorting --- src/zarr/core/group.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 51bd09b975..47dc667cb4 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -4,6 +4,7 @@ import itertools import json import logging +import unicodedata import warnings from collections import defaultdict from dataclasses import asdict, dataclass, field, fields, replace @@ -145,7 +146,7 @@ def to_dict(self) -> dict[str, JSON]: k: v.to_dict() for k, v in sorted( self.flattened_metadata.items(), - key=lambda item: (item[0].count("/"), item[0]), + key=lambda item: (item[0].count("/"), unicodedata.normalize("NFKC", item[0])), ) }, } From 83833bdfed159b6c66a6da9477ba6547b8ba0549 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Fri, 25 Jul 2025 11:29:28 +0200 Subject: [PATCH 3/7] Casefold metadata key before sorting --- src/zarr/core/group.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 47dc667cb4..f18c723a76 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -146,7 +146,10 @@ def to_dict(self) -> dict[str, JSON]: k: v.to_dict() for k, v in sorted( self.flattened_metadata.items(), - key=lambda item: (item[0].count("/"), unicodedata.normalize("NFKC", item[0])), + key=lambda item: ( + item[0].count("/"), + unicodedata.normalize("NFKC", item[0]).casefold(), + ), ) }, } From f957c13cdf33fd5fbb83a4b842f0fa0096c7ab13 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Thu, 24 Jul 2025 17:04:03 +0200 Subject: [PATCH 4/7] Add test to verify order of consolidated metadata --- tests/test_metadata/test_consolidated.py | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index ea2f834bb6..19eba4fb86 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -467,6 +467,35 @@ def test_to_dict_empty(self): } assert result == expected + @pytest.mark.parametrize("zarr_format", [2, 3]) + async def test_to_dict_order( + self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat + ) -> None: + with zarr.config.set(default_zarr_format=zarr_format): + g = await group(store=memory_store) + + # Create groups in non-lexicographix order + dtype = "float32" + await g.create_array(name="b", shape=(1,), dtype=dtype) + child = await g.create_group("c", attributes={"key": "child"}) + await g.create_array(name="a", shape=(1,), dtype=dtype) + + await child.create_array("e", shape=(1,), dtype=dtype) + await child.create_array("d", shape=(1,), dtype=dtype) + + # Consolidate metadata and re-open store + await zarr.api.asynchronous.consolidate_metadata(memory_store) + g2 = await zarr.api.asynchronous.open_group(store=memory_store) + + assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"] + assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [ + "a", + "b", + "c", + "c/d", + "c/e", + ] + @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat): store = zarr.storage.MemoryStore() From 94d4763a6ec1a133b4d1cd17b485027178b0f329 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Thu, 24 Jul 2025 17:46:37 +0200 Subject: [PATCH 5/7] Explain order of consolidated metadata keys in user guide --- docs/user-guide/consolidated_metadata.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst index 4cd72dbc74..13bd0dc4bb 100644 --- a/docs/user-guide/consolidated_metadata.rst +++ b/docs/user-guide/consolidated_metadata.rst @@ -100,6 +100,14 @@ With nested groups, the consolidated metadata is available on the children, recu >>> consolidated['child'].metadata.consolidated_metadata ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False) +.. versionadded:: 3.1.1 + + The keys in the consolidated metadata are sorted prior to writing. Keys are + sorted in ascending order by path depth, where a path is defined as a sequence + of strings joined by ``"/"``. For keys with the same path length, lexicographic + order is used to break the tie. This behaviour ensures deterministic metadata + output for a given group. + Synchronization and Concurrency ------------------------------- From 464fc10f184a77b7ade62559713609723fd33805 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Fri, 25 Jul 2025 07:52:38 +0200 Subject: [PATCH 6/7] Remove (now) unnecessary sort in user guide example --- docs/user-guide/consolidated_metadata.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst index 13bd0dc4bb..05a3aa7fb4 100644 --- a/docs/user-guide/consolidated_metadata.rst +++ b/docs/user-guide/consolidated_metadata.rst @@ -45,7 +45,7 @@ that can be used.: >>> consolidated = zarr.open_group(store=store) >>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata >>> from pprint import pprint - >>> pprint(dict(sorted(consolidated_metadata.items()))) + >>> pprint(dict(consolidated_metadata.items())) {'a': ArrayV3Metadata(shape=(1,), data_type=Float64(endianness='little'), chunk_grid=RegularChunkGrid(chunk_shape=(1,)), From dfddbecb5fe2dcc68e280c6862219ad751470010 Mon Sep 17 00:00:00 2001 From: Lukas Kluft Date: Wed, 23 Jul 2025 22:27:34 +0200 Subject: [PATCH 7/7] Document change --- changes/3288.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3288.misc.rst diff --git a/changes/3288.misc.rst b/changes/3288.misc.rst new file mode 100644 index 0000000000..af7119487f --- /dev/null +++ b/changes/3288.misc.rst @@ -0,0 +1 @@ +Sort dictionary keys before returning consolidated metadata to ensure deterministic output.