@@ -98,24 +98,6 @@ def _parse_async_node(
9898 raise TypeError (f"Unknown node type, got { type (node )} " )
9999
100100
101- class _MixedConsolidatedMetadataException (Exception ):
102- """
103- A custom, *internal* exception for when we encounter mixed consolidated metadata.
104-
105- Typically, Consolidated Metadata will explicitly indicate that there are no
106- additional children under a group with ``ConsolidatedMetadata(metadata={})``,
107- as opposed to ``metadata=None``. This is the behavior of ``consolidated_metadata``.
108- We rely on that "fact" to do I/O-free getitem: when a group's consolidated metadata
109- doesn't contain a child we can raise a ``KeyError`` without consulting the backing
110- store.
111-
112- Users can potentially get themselves in situations where there's "mixed" consolidated
113- metadata. For now, we'll raise this error, catch it internally, and silently fall back
114- to the store (which will either succeed or raise its own KeyError, slowly). We might
115- want to expose this in the future, in which case rename it add it to zarr.errors.
116- """
117-
118-
119101@dataclass (frozen = True )
120102class ConsolidatedMetadata :
121103 """
@@ -626,15 +608,7 @@ async def getitem(
626608
627609 # Consolidated metadata lets us avoid some I/O operations so try that first.
628610 if self .metadata .consolidated_metadata is not None :
629- try :
630- return self ._getitem_consolidated (store_path , key , prefix = self .name )
631- except _MixedConsolidatedMetadataException :
632- logger .info (
633- "Mixed consolidated and unconsolidated metadata. key=%s, store_path=%s" ,
634- key ,
635- store_path ,
636- )
637- # now fall back to the non-consolidated variant
611+ return self ._getitem_consolidated (store_path , key , prefix = self .name )
638612
639613 # Note:
640614 # in zarr-python v2, we first check if `key` references an Array, else if `key` references
@@ -691,9 +665,6 @@ def _getitem_consolidated(
691665 # getitem, in the special case where we have consolidated metadata.
692666 # Note that this is a regular def (non async) function.
693667 # This shouldn't do any additional I/O.
694- # All callers *must* catch _MixedConsolidatedMetadataException to ensure
695- # that we correctly handle the case where we need to fall back to doing
696- # additional I/O.
697668
698669 # the caller needs to verify this!
699670 assert self .metadata .consolidated_metadata is not None
@@ -708,14 +679,16 @@ def _getitem_consolidated(
708679 if isinstance (metadata , ArrayV2Metadata | ArrayV3Metadata ):
709680 # we've indexed into an array with group["array/subarray"]. Invalid.
710681 raise KeyError (key )
682+ if metadata .consolidated_metadata is None :
683+ # we've indexed into a group without consolidated metadata.
684+ # This isn't normal; typically, consolidated metadata
685+ # will include explicit markers for when there are no child
686+ # nodes as metadata={}.
687+ # We have some freedom in exactly how we interpret this case.
688+ # For now, we treat None as the same as {}, i.e. we don't
689+ # have any children.
690+ raise KeyError (key )
711691 try :
712- if metadata .consolidated_metadata is None :
713- # we've indexed into a group without consolidated metadata.
714- # Note that the `None` case is different from `metadata={}`
715- # where we explicitly know we have no children. In the None
716- # case we have to fall back to non-consolidated metadata.
717- raise _MixedConsolidatedMetadataException (key )
718-
719692 metadata = metadata .consolidated_metadata .metadata [indexer ]
720693 except KeyError as e :
721694 # The Group Metadata has consolidated metadata, but the key
@@ -981,11 +954,7 @@ async def create_array(
981954
982955 @deprecated ("Use AsyncGroup.create_array instead." )
983956 async def create_dataset (
984- self ,
985- name : str ,
986- * ,
987- shape : ShapeLike ,
988- ** kwargs : Any ,
957+ self , name : str , ** kwargs : Any
989958 ) -> AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ]:
990959 """Create an array.
991960
@@ -996,8 +965,6 @@ async def create_dataset(
996965 ----------
997966 name : str
998967 Array name.
999- shape : int or tuple of ints
1000- Array shape.
1001968 kwargs : dict
1002969 Additional arguments passed to :func:`zarr.AsyncGroup.create_array`.
1003970
@@ -1008,7 +975,7 @@ async def create_dataset(
1008975 .. deprecated:: 3.0.0
1009976 The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead.
1010977 """
1011- return await self .create_array (name , shape = shape , ** kwargs )
978+ return await self .create_array (name , ** kwargs )
1012979
1013980 @deprecated ("Use AsyncGroup.require_array instead." )
1014981 async def require_dataset (
@@ -1190,17 +1157,9 @@ async def _members(
11901157 if self .metadata .consolidated_metadata is not None :
11911158 # we should be able to do members without any additional I/O
11921159 members = self ._members_consolidated (max_depth , current_depth )
1193-
1194- try :
1195- # we already have this in memory, so fine to build this list
1196- # and catch the exception if needed.
1197- members_ = list (members )
1198- except _MixedConsolidatedMetadataException :
1199- pass
1200- else :
1201- for member in members_ :
1202- yield member
1203- return
1160+ members_ = list (members )
1161+ for member in members_ :
1162+ yield member
12041163
12051164 if not self .store_path .store .supports_listing :
12061165 msg = (
@@ -1258,25 +1217,13 @@ def _members_consolidated(
12581217 ]:
12591218 consolidated_metadata = self .metadata .consolidated_metadata
12601219
1261- if consolidated_metadata is None :
1262- raise _MixedConsolidatedMetadataException (prefix )
12631220 # we kind of just want the top-level keys.
12641221 if consolidated_metadata is not None :
12651222 for key in consolidated_metadata .metadata .keys ():
1266- try :
1267- obj = self ._getitem_consolidated (
1268- self .store_path , key , prefix = self .name
1269- ) # Metadata -> Group/Array
1270- key = f"{ prefix } /{ key } " .lstrip ("/" )
1271- except _MixedConsolidatedMetadataException :
1272- logger .info (
1273- "Mixed consolidated and unconsolidated metadata. key=%s, depth=%d, prefix=%s" ,
1274- key ,
1275- current_depth ,
1276- prefix ,
1277- )
1278- # This isn't an async def function so we need to re-raise up one more level.
1279- raise
1223+ obj = self ._getitem_consolidated (
1224+ self .store_path , key , prefix = self .name
1225+ ) # Metadata -> Group/Array
1226+ key = f"{ prefix } /{ key } " .lstrip ("/" )
12801227 yield key , obj
12811228
12821229 if ((max_depth is None ) or (current_depth < max_depth )) and isinstance (
@@ -1711,13 +1658,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array:
17111658 return Array (self ._sync (self ._async_group .create_dataset (name , ** kwargs )))
17121659
17131660 @deprecated ("Use Group.require_array instead." )
1714- def require_dataset (
1715- self ,
1716- name : str ,
1717- * ,
1718- shape : ShapeLike ,
1719- ** kwargs : Any ,
1720- ) -> Array :
1661+ def require_dataset (self , name : str , ** kwargs : Any ) -> Array :
17211662 """Obtain an array, creating if it doesn't exist.
17221663
17231664 Arrays are known as "datasets" in HDF5 terminology. For compatibility
@@ -1744,15 +1685,9 @@ def require_dataset(
17441685 .. deprecated:: 3.0.0
17451686 The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead.
17461687 """
1747- return Array (self ._sync (self ._async_group .require_array (name , shape = shape , ** kwargs )))
1688+ return Array (self ._sync (self ._async_group .require_array (name , ** kwargs )))
17481689
1749- def require_array (
1750- self ,
1751- name : str ,
1752- * ,
1753- shape : ShapeLike ,
1754- ** kwargs : Any ,
1755- ) -> Array :
1690+ def require_array (self , name : str , ** kwargs : Any ) -> Array :
17561691 """Obtain an array, creating if it doesn't exist.
17571692
17581693
@@ -1774,7 +1709,7 @@ def require_array(
17741709 -------
17751710 a : Array
17761711 """
1777- return Array (self ._sync (self ._async_group .require_array (name , shape = shape , ** kwargs )))
1712+ return Array (self ._sync (self ._async_group .require_array (name , ** kwargs )))
17781713
17791714 def empty (self , * , name : str , shape : ChunkCoords , ** kwargs : Any ) -> Array :
17801715 return Array (self ._sync (self ._async_group .empty (name = name , shape = shape , ** kwargs )))
0 commit comments