Skip to content

Commit 5c05c0c

Browse files
committed
merge upstream changes
2 parents d3fc21e + 6547b7f commit 5c05c0c

File tree

6 files changed

+95
-36
lines changed

6 files changed

+95
-36
lines changed

changes/3288.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Sort dictionary keys before returning consolidated metadata to ensure deterministic output.

docs/user-guide/consolidated_metadata.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ that can be used.:
4545
>>> consolidated = zarr.open_group(store=store)
4646
>>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
4747
>>> from pprint import pprint
48-
>>> pprint(dict(sorted(consolidated_metadata.items())))
48+
>>> pprint(dict(consolidated_metadata.items()))
4949
{'a': ArrayV3Metadata(shape=(1,),
5050
data_type=Float64(endianness='little'),
5151
chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
@@ -100,6 +100,14 @@ With nested groups, the consolidated metadata is available on the children, recu
100100
>>> consolidated['child'].metadata.consolidated_metadata
101101
ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)
102102

103+
.. versionadded:: 3.1.1
104+
105+
The keys in the consolidated metadata are sorted prior to writing. Keys are
106+
sorted in ascending order by path depth, where a path is defined as a sequence
107+
of strings joined by ``"/"``. For keys with the same path length, lexicographic
108+
order is used to break the tie. This behaviour ensures deterministic metadata
109+
output for a given group.
110+
103111
Synchronization and Concurrency
104112
-------------------------------
105113

src/zarr/core/group.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import itertools
55
import json
66
import logging
7+
import unicodedata
78
import warnings
89
from collections import defaultdict
910
from dataclasses import asdict, dataclass, field, fields, replace
@@ -141,7 +142,16 @@ def to_dict(self) -> dict[str, JSON]:
141142
return {
142143
"kind": self.kind,
143144
"must_understand": self.must_understand,
144-
"metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()},
145+
"metadata": {
146+
k: v.to_dict()
147+
for k, v in sorted(
148+
self.flattened_metadata.items(),
149+
key=lambda item: (
150+
item[0].count("/"),
151+
unicodedata.normalize("NFKC", item[0]).casefold(),
152+
),
153+
)
154+
},
145155
}
146156

147157
@classmethod

src/zarr/storage/_common.py

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -305,54 +305,60 @@ async def make_store(
305305
------
306306
TypeError
307307
If the StoreLike object is not one of the supported types, or if storage_options is provided but not used.
308-
ValueError
309-
If storage_options is provided for a store that does not support it.
310308
"""
311309
from zarr.storage._fsspec import FsspecStore # circular import
312310

313-
used_storage_options = False
314-
assert mode in (None, "r", "r+", "a", "w", "w-")
311+
if (
312+
not (isinstance(store_like, str) and _is_fsspec_uri(store_like))
313+
and storage_options is not None
314+
):
315+
raise TypeError(
316+
"'storage_options' was provided but unused. "
317+
"'storage_options' is only used when the store is passed as a FSSpec URI string.",
318+
)
315319

316-
# if mode 'r' was provided, we'll open any new stores as read-only
320+
assert mode in (None, "r", "r+", "a", "w", "w-")
317321
_read_only = mode == "r"
318322

319323
if isinstance(store_like, StorePath):
320-
store = store_like.store
324+
# Already a StorePath
325+
return store_like.store
326+
321327
elif isinstance(store_like, Store):
322-
store = store_like
328+
# Already a Store
329+
return store_like
330+
331+
elif isinstance(store_like, dict):
332+
# Already a dictionary that can be a MemoryStore
333+
#
334+
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
335+
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
336+
return await MemoryStore.open(store_dict=store_like, read_only=_read_only)
337+
323338
elif store_like is None:
324-
store = await MemoryStore.open(read_only=_read_only)
339+
# Create a new in-memory store
340+
return await make_store({}, mode=mode, storage_options=storage_options)
341+
325342
elif isinstance(store_like, Path):
326-
store = await LocalStore.open(root=store_like, read_only=_read_only)
327-
elif isinstance(store_like, str):
328-
storage_options = storage_options or {}
343+
# Create a new LocalStore
344+
return await LocalStore.open(root=store_like, read_only=_read_only)
329345

346+
elif isinstance(store_like, str):
347+
# Either a FSSpec URI or a local filesystem path
330348
if _is_fsspec_uri(store_like):
331-
used_storage_options = True
332-
store = FsspecStore.from_url(
349+
return FsspecStore.from_url(
333350
store_like, storage_options=storage_options, read_only=_read_only
334351
)
335352
else:
336-
store = await LocalStore.open(root=Path(store_like), read_only=_read_only)
337-
elif isinstance(store_like, dict):
338-
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
339-
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
340-
store = await MemoryStore.open(store_dict=store_like, read_only=_read_only)
353+
# Assume a filesystem path
354+
return await make_store(Path(store_like), mode=mode, storage_options=storage_options)
355+
341356
elif _has_fsspec and isinstance(store_like, FSMap):
342-
if storage_options:
343-
raise ValueError(
344-
"'storage_options was provided but is not used for FSMap store_like objects. Specify the storage options when creating the FSMap instance instead."
345-
)
346-
store = FsspecStore.from_mapper(store_like, read_only=_read_only)
357+
return FsspecStore.from_mapper(store_like, read_only=_read_only)
358+
347359
else:
348360
raise TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'")
349361

350-
if storage_options and not used_storage_options:
351-
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
352-
raise TypeError(msg)
353-
354-
return store
355-
356362

357363
async def make_store_path(
358364
store_like: StoreLike | None,
@@ -391,7 +397,7 @@ async def make_store_path(
391397
TypeError
392398
If the StoreLike object is not one of the supported types, or if storage_options is provided but not used.
393399
ValueError
394-
If storage_options is provided for a store that does not support it.
400+
If path is provided for a store that does not support it.
395401
396402
See Also
397403
--------
@@ -400,14 +406,19 @@ async def make_store_path(
400406
path_normalized = normalize_path(path)
401407

402408
if isinstance(store_like, StorePath):
409+
# Already a StorePath
403410
if storage_options:
404-
msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores."
405-
raise TypeError(msg)
411+
raise TypeError(
412+
"'storage_options' was provided but unused. "
413+
"'storage_options' is only used when the store is passed as a FSSpec URI string.",
414+
)
406415
return store_like / path_normalized
416+
407417
elif _has_fsspec and isinstance(store_like, FSMap) and path:
408418
raise ValueError(
409419
"'path' was provided but is not used for FSMap store_like objects. Specify the path when creating the FSMap instance instead."
410420
)
421+
411422
else:
412423
store = await make_store(store_like, mode=mode, storage_options=storage_options)
413424
return await StorePath.open(store, path=path_normalized, mode=mode)

tests/test_metadata/test_consolidated.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,35 @@ def test_to_dict_empty(self):
467467
}
468468
assert result == expected
469469

470+
@pytest.mark.parametrize("zarr_format", [2, 3])
471+
async def test_to_dict_order(
472+
self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat
473+
) -> None:
474+
with zarr.config.set(default_zarr_format=zarr_format):
475+
g = await group(store=memory_store)
476+
477+
# Create groups in non-lexicographix order
478+
dtype = "float32"
479+
await g.create_array(name="b", shape=(1,), dtype=dtype)
480+
child = await g.create_group("c", attributes={"key": "child"})
481+
await g.create_array(name="a", shape=(1,), dtype=dtype)
482+
483+
await child.create_array("e", shape=(1,), dtype=dtype)
484+
await child.create_array("d", shape=(1,), dtype=dtype)
485+
486+
# Consolidate metadata and re-open store
487+
await zarr.api.asynchronous.consolidate_metadata(memory_store)
488+
g2 = await zarr.api.asynchronous.open_group(store=memory_store)
489+
490+
assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"]
491+
assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [
492+
"a",
493+
"b",
494+
"c",
495+
"c/d",
496+
"c/e",
497+
]
498+
470499
@pytest.mark.parametrize("zarr_format", [2, 3])
471500
async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
472501
store = zarr.storage.MemoryStore()

tests/test_store/test_fsspec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,8 @@ def test_open_s3map_raises() -> None:
388388
):
389389
zarr.open(store=mapper, path="bar", mode="w", shape=(3, 3))
390390
with pytest.raises(
391-
ValueError,
392-
match="'storage_options was provided but is not used for FSMap store_like objects",
391+
TypeError,
392+
match="'storage_options' is only used when the store is passed as a FSSpec URI string.",
393393
):
394394
zarr.open(store=mapper, storage_options={"anon": True}, mode="w", shape=(3, 3))
395395

0 commit comments

Comments
 (0)