1
1
from __future__ import annotations
2
2
3
3
import asyncio
4
+ import dataclasses
4
5
import warnings
5
6
from typing import TYPE_CHECKING , Any , Literal , cast
6
7
9
10
10
11
from zarr .abc .store import Store
11
12
from zarr .core .array import Array , AsyncArray , get_array_metadata
12
- from zarr .core .common import JSON , AccessModeLiteral , ChunkCoords , MemoryOrder , ZarrFormat
13
+ from zarr .core .buffer import NDArrayLike
14
+ from zarr .core .chunk_key_encodings import ChunkKeyEncoding
15
+ from zarr .core .common import (
16
+ JSON ,
17
+ AccessModeLiteral ,
18
+ ChunkCoords ,
19
+ MemoryOrder ,
20
+ ZarrFormat ,
21
+ )
13
22
from zarr .core .config import config
14
- from zarr .core .group import AsyncGroup
23
+ from zarr .core .group import AsyncGroup , ConsolidatedMetadata , GroupMetadata
15
24
from zarr .core .metadata import ArrayMetadataDict , ArrayV2Metadata , ArrayV3Metadata
16
25
from zarr .errors import NodeTypeValidationError
17
26
from zarr .storage import (
@@ -132,8 +141,64 @@ def _default_zarr_version() -> ZarrFormat:
132
141
return cast (ZarrFormat , int (config .get ("default_zarr_version" , 3 )))
133
142
134
143
135
- async def consolidate_metadata (* args : Any , ** kwargs : Any ) -> AsyncGroup :
136
- raise NotImplementedError
144
+ async def consolidate_metadata (
145
+ store : StoreLike ,
146
+ path : str | None = None ,
147
+ zarr_format : ZarrFormat | None = None ,
148
+ ) -> AsyncGroup :
149
+ """
150
+ Consolidate the metadata of all nodes in a hierarchy.
151
+
152
+ Upon completion, the metadata of the root node in the Zarr hierarchy will be
153
+ updated to include all the metadata of child nodes.
154
+
155
+ Parameters
156
+ ----------
157
+ store: StoreLike
158
+ The store-like object whose metadata you wish to consolidate.
159
+ path: str, optional
160
+ A path to a group in the store to consolidate at. Only children
161
+ below that group will be consolidated.
162
+
163
+ By default, the root node is used so all the metadata in the
164
+ store is consolidated.
165
+ zarr_format : {2, 3, None}, optional
166
+ The zarr format of the hierarchy. By default the zarr format
167
+ is inferred.
168
+
169
+ Returns
170
+ -------
171
+ group: AsyncGroup
172
+ The group, with the ``consolidated_metadata`` field set to include
173
+ the metadata of each child node.
174
+ """
175
+ store_path = await make_store_path (store )
176
+
177
+ if path is not None :
178
+ store_path = store_path / path
179
+
180
+ group = await AsyncGroup .open (store_path , zarr_format = zarr_format , use_consolidated = False )
181
+ group .store_path .store ._check_writable ()
182
+
183
+ members_metadata = {k : v .metadata async for k , v in group .members (max_depth = None )}
184
+
185
+ # While consolidating, we want to be explicit about when child groups
186
+ # are empty by inserting an empty dict for consolidated_metadata.metadata
187
+ for k , v in members_metadata .items ():
188
+ if isinstance (v , GroupMetadata ) and v .consolidated_metadata is None :
189
+ v = dataclasses .replace (v , consolidated_metadata = ConsolidatedMetadata (metadata = {}))
190
+ members_metadata [k ] = v
191
+
192
+ ConsolidatedMetadata ._flat_to_nested (members_metadata )
193
+
194
+ consolidated_metadata = ConsolidatedMetadata (metadata = members_metadata )
195
+ metadata = dataclasses .replace (group .metadata , consolidated_metadata = consolidated_metadata )
196
+ group = dataclasses .replace (
197
+ group ,
198
+ metadata = metadata ,
199
+ )
200
+ await group ._save_metadata ()
201
+ return group
137
202
138
203
139
204
async def copy (* args : Any , ** kwargs : Any ) -> tuple [int , int , int ]:
@@ -256,8 +321,18 @@ async def open(
256
321
return await open_group (store = store_path , zarr_format = zarr_format , ** kwargs )
257
322
258
323
259
- async def open_consolidated (* args : Any , ** kwargs : Any ) -> AsyncGroup :
260
- raise NotImplementedError
324
+ async def open_consolidated (
325
+ * args : Any , use_consolidated : Literal [True ] = True , ** kwargs : Any
326
+ ) -> AsyncGroup :
327
+ """
328
+ Alias for :func:`open_group` with ``use_consolidated=True``.
329
+ """
330
+ if use_consolidated is not True :
331
+ raise TypeError (
332
+ "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
333
+ "'use_consolidated=False' to bypass consolidated metadata."
334
+ )
335
+ return await open_group (* args , use_consolidated = use_consolidated , ** kwargs )
261
336
262
337
263
338
async def save (
@@ -549,6 +624,7 @@ async def open_group(
549
624
zarr_format : ZarrFormat | None = None ,
550
625
meta_array : Any | None = None , # not used
551
626
attributes : dict [str , JSON ] | None = None ,
627
+ use_consolidated : bool | str | None = None ,
552
628
) -> AsyncGroup :
553
629
"""Open a group using file-mode-like semantics.
554
630
@@ -589,6 +665,22 @@ async def open_group(
589
665
to users. Use `numpy.empty(())` by default.
590
666
attributes : dict
591
667
A dictionary of JSON-serializable values with user-defined attributes.
668
+ use_consolidated : bool or str, default None
669
+ Whether to use consolidated metadata.
670
+
671
+ By default, consolidated metadata is used if it's present in the
672
+ store (in the ``zarr.json`` for Zarr v3 and in the ``.zmetadata`` file
673
+ for Zarr v2).
674
+
675
+ To explicitly require consolidated metadata, set ``use_consolidated=True``,
676
+ which will raise an exception if consolidated metadata is not found.
677
+
678
+ To explicitly *not* use consolidated metadata, set ``use_consolidated=False``,
679
+ which will fall back to using the regular, non consolidated metadata.
680
+
681
+ Zarr v2 allowed configuring the key storing the consolidated metadata
682
+ (``.zmetadata`` by default). Specify the custom key as ``use_consolidated``
683
+ to load consolidated metadata from a non-default key.
592
684
593
685
Returns
594
686
-------
@@ -615,7 +707,9 @@ async def open_group(
615
707
attributes = {}
616
708
617
709
try :
618
- return await AsyncGroup .open (store_path , zarr_format = zarr_format )
710
+ return await AsyncGroup .open (
711
+ store_path , zarr_format = zarr_format , use_consolidated = use_consolidated
712
+ )
619
713
except (KeyError , FileNotFoundError ):
620
714
return await AsyncGroup .from_store (
621
715
store_path ,
@@ -777,7 +871,9 @@ async def create(
777
871
)
778
872
else :
779
873
warnings .warn (
780
- "dimension_separator is not yet implemented" , RuntimeWarning , stacklevel = 2
874
+ "dimension_separator is not yet implemented" ,
875
+ RuntimeWarning ,
876
+ stacklevel = 2 ,
781
877
)
782
878
if write_empty_chunks :
783
879
warnings .warn ("write_empty_chunks is not yet implemented" , RuntimeWarning , stacklevel = 2 )
0 commit comments