@@ -97,6 +97,24 @@ def _parse_async_node(
9797 raise TypeError (f"Unknown node type, got { type (node )} " )
9898
9999
100+ class _MixedConsolidatedMetadataException (Exception ):
101+ """
102+ A custom, *internal* exception for when we encounter mixed consolidated metadata.
103+
104+ Typically, Consolidated Metadata will explicitly indicate that there are no
105+ additional children under a group with ``ConsolidatedMetadata(metadata={})``,
106+ as opposed to ``metadata=None``. This is the behavior of ``consolidated_metadata``.
107+ We rely on that "fact" to do I/O-free getitem: when a group's consolidated metadata
108+ doesn't contain a child we can raise a ``KeyError`` without consulting the backing
109+ store.
110+
111+ Users can potentially get themselves in situations where there's "mixed" consolidated
112+ metadata. For now, we'll raise this error, catch it internally, and silently fall back
113+ to the store (which will either succeed or raise its own KeyError, slowly). We might
114+ want to expose this in the future, in which case rename it add it to zarr.errors.
115+ """
116+
117+
100118@dataclass (frozen = True )
101119class ConsolidatedMetadata :
102120 """
@@ -571,7 +589,10 @@ def _from_bytes_v2(
571589
572590 @classmethod
573591 def _from_bytes_v3 (
574- cls , store_path : StorePath , zarr_json_bytes : Buffer , use_consolidated : bool | None
592+ cls ,
593+ store_path : StorePath ,
594+ zarr_json_bytes : Buffer ,
595+ use_consolidated : bool | None ,
575596 ) -> AsyncGroup :
576597 group_metadata = json .loads (zarr_json_bytes .to_bytes ())
577598 if use_consolidated and group_metadata .get ("consolidated_metadata" ) is None :
@@ -604,14 +625,22 @@ async def getitem(
604625
605626 # Consolidated metadata lets us avoid some I/O operations so try that first.
606627 if self .metadata .consolidated_metadata is not None :
607- return self ._getitem_consolidated (store_path , key , prefix = self .name )
628+ try :
629+ return self ._getitem_consolidated (store_path , key , prefix = self .name )
630+ except _MixedConsolidatedMetadataException :
631+ logger .info (
632+ "Mixed consolidated and unconsolidated metadata. key=%s, store_path=%s" ,
633+ key ,
634+ store_path ,
635+ )
636+ # now fall back to the non-consolidated variant
608637
609638 # Note:
610639 # in zarr-python v2, we first check if `key` references an Array, else if `key` references
611640 # a group,using standalone `contains_array` and `contains_group` functions. These functions
612641 # are reusable, but for v3 they would perform redundant I/O operations.
613642 # Not clear how much of that strategy we want to keep here.
614- elif self .metadata .zarr_format == 3 :
643+ if self .metadata .zarr_format == 3 :
615644 zarr_json_bytes = await (store_path / ZARR_JSON ).get ()
616645 if zarr_json_bytes is None :
617646 raise KeyError (key )
@@ -661,18 +690,39 @@ def _getitem_consolidated(
661690 # getitem, in the special case where we have consolidated metadata.
662691 # Note that this is a regular def (non async) function.
663692 # This shouldn't do any additional I/O.
693+ # All callers *must* catch _MixedConsolidatedMetadataException to ensure
694+ # that we correctly handle the case where we need to fall back to doing
695+ # additional I/O.
664696
665697 # the caller needs to verify this!
666698 assert self .metadata .consolidated_metadata is not None
667699
668- try :
669- metadata = self .metadata .consolidated_metadata .metadata [key ]
670- except KeyError as e :
671- # The Group Metadata has consolidated metadata, but the key
672- # isn't present. We trust this to mean that the key isn't in
673- # the hierarchy, and *don't* fall back to checking the store.
674- msg = f"'{ key } ' not found in consolidated metadata."
675- raise KeyError (msg ) from e
700+ # we support nested getitems like group/subgroup/array
701+ indexers = key .split ("/" )
702+ indexers .reverse ()
703+ metadata : ArrayV2Metadata | ArrayV3Metadata | GroupMetadata = self .metadata
704+
705+ while indexers :
706+ indexer = indexers .pop ()
707+ if isinstance (metadata , ArrayV2Metadata | ArrayV3Metadata ):
708+ # we've indexed into an array with group["array/subarray"]. Invalid.
709+ raise KeyError (key )
710+ try :
711+ if metadata .consolidated_metadata is None :
712+ # we've indexed into a group without consolidated metadata.
713+ # Note that the `None` case is different from `metadata={}`
714+ # where we explicitly know we have no children. In the None
715+ # case we have to fall back to non-consolidated metadata.
716+ raise _MixedConsolidatedMetadataException (key )
717+ assert metadata .consolidated_metadata is not None
718+
719+ metadata = metadata .consolidated_metadata .metadata [indexer ]
720+ except KeyError as e :
721+ # The Group Metadata has consolidated metadata, but the key
722+ # isn't present. We trust this to mean that the key isn't in
723+ # the hierarchy, and *don't* fall back to checking the store.
724+ msg = f"'{ key } ' not found in consolidated metadata."
725+ raise KeyError (msg ) from e
676726
677727 # update store_path to ensure that AsyncArray/Group.name is correct
678728 if prefix != "/" :
@@ -1087,7 +1137,8 @@ async def members(
10871137 self ,
10881138 max_depth : int | None = 0 ,
10891139 ) -> AsyncGenerator [
1090- tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ], None
1140+ tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ],
1141+ None ,
10911142 ]:
10921143 """
10931144 Returns an AsyncGenerator over the arrays and groups contained in this group.
@@ -1118,15 +1169,20 @@ async def members(
11181169 async def _members (
11191170 self , max_depth : int | None , current_depth : int
11201171 ) -> AsyncGenerator [
1121- tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ], None
1172+ tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ],
1173+ None ,
11221174 ]:
11231175 if self .metadata .consolidated_metadata is not None :
11241176 # we should be able to do members without any additional I/O
1125- members = self ._members_consolidated (max_depth , current_depth )
1126-
1127- for member in members :
1128- yield member
1129- return
1177+ try :
1178+ members = self ._members_consolidated (max_depth , current_depth )
1179+ except _MixedConsolidatedMetadataException :
1180+ # we've already logged this. We'll fall back to the non-consolidated version.
1181+ pass
1182+ else :
1183+ for member in members :
1184+ yield member
1185+ return
11301186
11311187 if not self .store_path .store .supports_listing :
11321188 msg = (
@@ -1177,17 +1233,28 @@ async def _members(
11771233 def _members_consolidated (
11781234 self , max_depth : int | None , current_depth : int , prefix : str = ""
11791235 ) -> Generator [
1180- tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ], None
1236+ tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ],
1237+ None ,
11811238 ]:
11821239 consolidated_metadata = self .metadata .consolidated_metadata
11831240
11841241 # we kind of just want the top-level keys.
11851242 if consolidated_metadata is not None :
11861243 for key in consolidated_metadata .metadata .keys ():
1187- obj = self ._getitem_consolidated (
1188- self .store_path , key , prefix = self .name
1189- ) # Metadata -> Group/Array
1190- key = f"{ prefix } /{ key } " .lstrip ("/" )
1244+ try :
1245+ obj = self ._getitem_consolidated (
1246+ self .store_path , key , prefix = self .name
1247+ ) # Metadata -> Group/Array
1248+ key = f"{ prefix } /{ key } " .lstrip ("/" )
1249+ except _MixedConsolidatedMetadataException :
1250+ logger .info (
1251+ "Mixed consolidated and unconsolidated metadata. key=%s, depth=%d, prefix=%s" ,
1252+ key ,
1253+ current_depth ,
1254+ prefix ,
1255+ )
1256+ # This isn't an async def function so we need to re-raise up one more level.
1257+ raise
11911258 yield key , obj
11921259
11931260 if ((max_depth is None ) or (current_depth < max_depth )) and isinstance (
@@ -1262,7 +1329,11 @@ async def full(
12621329 self , * , name : str , shape : ChunkCoords , fill_value : Any | None , ** kwargs : Any
12631330 ) -> AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ]:
12641331 return await async_api .full (
1265- shape = shape , fill_value = fill_value , store = self .store_path , path = name , ** kwargs
1332+ shape = shape ,
1333+ fill_value = fill_value ,
1334+ store = self .store_path ,
1335+ path = name ,
1336+ ** kwargs ,
12661337 )
12671338
12681339 async def empty_like (
0 commit comments