Skip to content

Commit 42ffbbd

Browse files
committed
Allow Stores to opt out of consolidated metadata.
Some Stores don't benefit from Zarr's consolidated metadata mechanism. These Stores usually implement their own consolidation mechanism, or provide good performance for metadata retrieval out of the box. These Stores can now implement the `supports_consolidated_metadata` property returning `False`. In this situation, Zarr will silently ignore any requests to consolidate the metadata.
1 parent a8d4f42 commit 42ffbbd

File tree

6 files changed

+75
-8
lines changed

6 files changed

+75
-8
lines changed

docs/user-guide/consolidated_metadata.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,19 @@ removed, or modified, consolidated metadata may not be desirable.
114114
metadata.
115115

116116
.. _Consolidated Metadata: https://github.com/zarr-developers/zarr-specs/pull/309
117+
118+
Stores Without Support for Consolidated Metadata
119+
------------------------------------------------
120+
121+
Some stores may want to opt out of the conolidated metadata mechanism. This
122+
may be for several reasons like:
123+
124+
* They want to maintain read-write consistency, which is challenging with
125+
consolidated metadata.
126+
* They have their own consolidated metadata mechanism.
127+
* They offer good enough performance without need for consolidation.
128+
129+
This type of store can declare it doesn't want consolidation by implementing
130+
`Store.supports_consolidated_metadata`. For stores that don't support
131+
consolidation, Zarr will silently ignore any `consolidate_metadata` calls,
132+
maintainting the store in its unconsolidated state.

src/zarr/abc/store.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,18 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None:
264264
"""
265265
await gather(*starmap(self.set, values))
266266

267+
@property
268+
def supports_consolidated_metadata(self) -> bool:
269+
"""
270+
Does the store support and benefit from consolidated metadata?.
271+
272+
If it doesn't Zarr will ignore requests to consolidate the metadata.
273+
Stores that would return `True` are the ones that implement their own
274+
consolidation mechanism, that allows fast querying of metadata keys.
275+
"""
276+
277+
return True
278+
267279
@property
268280
@abstractmethod
269281
def supports_deletes(self) -> bool:

src/zarr/api/asynchronous.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ async def consolidate_metadata(
174174
Consolidate the metadata of all nodes in a hierarchy.
175175
176176
Upon completion, the metadata of the root node in the Zarr hierarchy will be
177-
updated to include all the metadata of child nodes.
177+
updated to include all the metadata of child nodes. For Stores that prefer
178+
not to use consolidated metadata, this operation does nothing.
178179
179180
Parameters
180181
----------
@@ -194,11 +195,16 @@ async def consolidate_metadata(
194195
-------
195196
group: AsyncGroup
196197
The group, with the ``consolidated_metadata`` field set to include
197-
the metadata of each child node.
198+
the metadata of each child node. If the Store doesn't prefer
199+
consolidated metadata, this is function does nothing and returns
200+
the group without modifications. See ``Store.supports_consolidated_metadata``.
198201
"""
199202
store_path = await make_store_path(store, path=path)
200203

201204
group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False)
205+
if not group.store_path.store.supports_consolidated_metadata:
206+
return group
207+
202208
group.store_path.store._check_writable()
203209

204210
members_metadata = {

src/zarr/api/synchronous.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def consolidate_metadata(
8181
Consolidate the metadata of all nodes in a hierarchy.
8282
8383
Upon completion, the metadata of the root node in the Zarr hierarchy will be
84-
updated to include all the metadata of child nodes.
84+
updated to include all the metadata of child nodes. For Stores that prefer
85+
not to use consolidated metadata, this operation does nothing.
8586
8687
Parameters
8788
----------
@@ -101,7 +102,10 @@ def consolidate_metadata(
101102
-------
102103
group: Group
103104
The group, with the ``consolidated_metadata`` field set to include
104-
the metadata of each child node.
105+
the metadata of each child node. If the Store doesn't prefer
106+
consolidated metadata, this is function does nothing and returns
107+
the group without modifications. See ``Store.supports_consolidated_metadata``.
108+
105109
"""
106110
return Group(sync(async_api.consolidate_metadata(store, path=path, zarr_format=zarr_format)))
107111

src/zarr/core/group.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,11 @@ async def open(
492492
store (in the ``zarr.json`` for Zarr format 3 and in the ``.zmetadata`` file
493493
for Zarr format 2).
494494
495-
To explicitly require consolidated metadata, set ``use_consolidated=True``,
496-
which will raise an exception if consolidated metadata is not found.
495+
To explicitly require consolidated metadata, set ``use_consolidated=True``.
496+
If the Store supports consolidated metadata, this will raise an
497+
exception if consolidated metadata is not found. If the Store doesn't want
498+
to use consolidated metadata, we assume it implements its own consolidation,
499+
so this is equivalent to use_consolidated=False.
497500
498501
To explicitly *not* use consolidated metadata, set ``use_consolidated=False``,
499502
which will fall back to using the regular, non consolidated metadata.
@@ -503,6 +506,8 @@ async def open(
503506
to load consolidated metadata from a non-default key.
504507
"""
505508
store_path = await make_store_path(store)
509+
if not store_path.store.supports_consolidated_metadata:
510+
use_consolidated = False
506511

507512
consolidated_key = ZMETADATA_V2_JSON
508513

tests/test_metadata/test_consolidated.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
open,
1818
open_consolidated,
1919
)
20-
from zarr.core.buffer import cpu, default_buffer_prototype
20+
from zarr.core.buffer import Buffer, cpu, default_buffer_prototype
2121
from zarr.core.group import ConsolidatedMetadata, GroupMetadata
2222
from zarr.core.metadata import ArrayV3Metadata
2323
from zarr.core.metadata.v2 import ArrayV2Metadata
2424
from zarr.storage import StorePath
2525

2626
if TYPE_CHECKING:
27-
from zarr.abc.store import Store
27+
from zarr.abc.store import ByteRequest, Store
2828
from zarr.core.common import ZarrFormat
2929

3030

@@ -651,3 +651,27 @@ async def test_consolidated_metadata_encodes_special_chars(
651651
elif zarr_format == 3:
652652
assert root_metadata["child"]["attributes"]["test"] == expected_fill_value
653653
assert root_metadata["time"]["fill_value"] == expected_fill_value
654+
655+
656+
async def test_consolidate_metadata_is_noop_for_self_consolidating_stores():
657+
"""Verify calling consolidate_metadata on a non supporting stores does nothing"""
658+
659+
# We create a store that doesn't support consolidated metadata
660+
class Store(zarr.storage.MemoryStore):
661+
@property
662+
def supports_consolidated_metadata(self) -> bool:
663+
return False
664+
665+
memory_store = Store()
666+
root = await zarr.api.asynchronous.create_group(store=memory_store)
667+
await root.create_group("a/b")
668+
669+
# now we monkey patch the store so it raises if `Store.set` is called
670+
async def set_raises(self, value: Buffer, byte_range: ByteRequest | None = None) -> None:
671+
raise ValueError("consolidated metadata called")
672+
673+
memory_store.set = set_raises
674+
675+
# consolidate_metadata would call `set` if the store supported consolidated metadata
676+
# if this doesn't raise, it means consolidate_metadata is NOOP
677+
await zarr.api.asynchronous.consolidate_metadata(memory_store)

0 commit comments

Comments
 (0)