1919from zarr .abc .store import Store , set_or_delete
2020from zarr .core .array import Array , AsyncArray , _build_parents
2121from zarr .core .attributes import Attributes
22- from zarr .core .buffer import default_buffer_prototype
22+ from zarr .core .buffer import default_buffer_prototype , Buffer
2323from zarr .core .common import (
2424 JSON ,
2525 ZARR_JSON ,
@@ -1151,18 +1151,18 @@ async def members(
11511151 """
11521152 if max_depth is not None and max_depth < 0 :
11531153 raise ValueError (f"max_depth must be None or >= 0. Got '{ max_depth } ' instead" )
1154- async for item in self ._members (max_depth = max_depth , current_depth = 0 ):
1154+ async for item in self ._members (max_depth = max_depth ):
11551155 yield item
11561156
1157- async def _members (
1157+ async def _members_old (
11581158 self , max_depth : int | None , current_depth : int
11591159 ) -> AsyncGenerator [
11601160 tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ],
11611161 None ,
11621162 ]:
11631163 if self .metadata .consolidated_metadata is not None :
11641164 # we should be able to do members without any additional I/O
1165- members = self ._members_consolidated (max_depth , current_depth )
1165+ members = self ._members_consolidated (max_depth )
11661166 for member in members :
11671167 yield member
11681168 return
@@ -1202,8 +1202,7 @@ async def _members(
12021202 # implies an AsyncGroup, not an AsyncArray
12031203 assert isinstance (obj , AsyncGroup )
12041204 async for child_key , val in obj ._members (
1205- max_depth = max_depth , current_depth = current_depth + 1
1206- ):
1205+ max_depth = max_depth ):
12071206 yield f"{ key } /{ child_key } " , val
12081207 except KeyError :
12091208 # keyerror is raised when `key` names an object (in the object storage sense),
@@ -1216,12 +1215,14 @@ async def _members(
12161215 )
12171216
12181217 def _members_consolidated (
1219- self , max_depth : int | None , current_depth : int , prefix : str = ""
1218+ self , max_depth : int | None , prefix : str = ""
12201219 ) -> Generator [
12211220 tuple [str , AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ] | AsyncGroup ],
12221221 None ,
12231222 ]:
12241223 consolidated_metadata = self .metadata .consolidated_metadata
1224+
1225+ do_recursion = max_depth is None or max_depth > 0
12251226
12261227 # we kind of just want the top-level keys.
12271228 if consolidated_metadata is not None :
@@ -1232,10 +1233,43 @@ def _members_consolidated(
12321233 key = f"{ prefix } /{ key } " .lstrip ("/" )
12331234 yield key , obj
12341235
1235- if (( max_depth is None ) or ( current_depth < max_depth )) and isinstance (
1236+ if do_recursion and isinstance (
12361237 obj , AsyncGroup
12371238 ):
1238- yield from obj ._members_consolidated (max_depth , current_depth + 1 , prefix = key )
1239+ if max_depth is None :
1240+ new_depth = None
1241+ else :
1242+ new_depth = max_depth - 1
1243+ yield from obj ._members_consolidated (new_depth , prefix = key )
1244+
1245+ async def _members (
1246+ self ,
1247+ max_depth : int | None ) -> AsyncGenerator [tuple [str , AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ] | AsyncGroup ], None ]:
1248+ skip_keys : tuple [str , ...]
1249+ if self .metadata .zarr_format == 2 :
1250+ skip_keys = ('.zattrs' , '.zgroup' ,'.zarray' , '.zmetadata' )
1251+ elif self .metadata .zarr_format == 3 :
1252+ skip_keys = ('zarr.json' ,)
1253+ else :
1254+ raise ValueError (f"Unknown Zarr format: { self .metadata .zarr_format } " )
1255+
1256+ if self .metadata .consolidated_metadata is not None :
1257+ members = self ._members_consolidated (max_depth = max_depth )
1258+ for member in members :
1259+ yield member
1260+ return
1261+
1262+ if not self .store_path .store .supports_listing :
1263+ msg = (
1264+ f"The store associated with this group ({ type (self .store_path .store )} ) "
1265+ "does not support listing, "
1266+ "specifically via the `list_dir` method. "
1267+ "This function requires a store that supports listing."
1268+ )
1269+
1270+ raise ValueError (msg )
1271+ async for member in iter_members_deep (self , max_depth = max_depth , prefix = self .basename , skip_keys = skip_keys ):
1272+ yield member
12391273
12401274 async def keys (self ) -> AsyncGenerator [str , None ]:
12411275 async for key , _ in self .members ():
@@ -1848,10 +1882,13 @@ def array(
18481882 )
18491883
18501884
1851- async def members_v3 (
1885+ async def members_recursive (
18521886 store : Store ,
18531887 path : str ,
18541888) -> Any :
1889+ """
1890+ Recursively fetch all members of a group.
1891+ """
18551892 metadata_keys = ("zarr.json" ,)
18561893
18571894 members_flat : tuple [tuple [str , ArrayV3Metadata | GroupMetadata ], ...] = ()
@@ -1879,18 +1916,88 @@ async def members_v3(
18791916 resolved_metadata = resolve_metadata_v3 (blob .to_bytes ())
18801917 members_flat += ((key_body , resolved_metadata ),)
18811918 if isinstance (resolved_metadata , GroupMetadata ):
1882- to_recurse .append (members_v3 (store , key_body ))
1883-
1884- # for r in to_recurse:
1885- # members_flat += await r
1919+ to_recurse .append (
1920+ members_recursive (store , key_body ))
18861921
18871922 subgroups = await asyncio .gather (* to_recurse )
18881923 members_flat += tuple (subgroup for subgroup in subgroups )
18891924
1890- # recurse for groups
1891-
18921925 return members_flat
18931926
1927+ async def iter_members (
1928+ node : AsyncGroup ,
1929+ skip_keys : tuple [str , ...]
1930+ ) -> AsyncGenerator [tuple [str , AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ] | AsyncGroup ], None ]:
1931+ """
1932+ Iterate over the arrays and groups contained in a group.
1933+ """
1934+
1935+ # retrieve keys from storage
1936+ keys = [key async for key in node .store .list_dir (node .path )]
1937+ keys_filtered = tuple (filter (lambda v : v not in skip_keys , keys ))
1938+
1939+ node_tasks = tuple (asyncio .create_task (
1940+ node .getitem (key ), name = key ) for key in keys_filtered )
1941+
1942+ for fetched_node_coro in asyncio .as_completed (node_tasks ):
1943+ try :
1944+ fetched_node = await fetched_node_coro
1945+ except KeyError as e :
1946+ # keyerror is raised when `key` names an object (in the object storage sense),
1947+ # as opposed to a prefix, in the store under the prefix associated with this group
1948+ # in which case `key` cannot be the name of a sub-array or sub-group.
1949+ warnings .warn (
1950+ f"Object at { e .args [0 ]} is not recognized as a component of a Zarr hierarchy." ,
1951+ UserWarning ,
1952+ stacklevel = 1 ,
1953+ )
1954+ continue
1955+ match fetched_node :
1956+ case AsyncArray () | AsyncGroup ():
1957+ yield fetched_node .basename , fetched_node
1958+ case _:
1959+ raise ValueError (f"Unexpected type: { type (fetched_node )} " )
1960+
1961+ async def iter_members_deep (
1962+ group : AsyncGroup ,
1963+ * ,
1964+ prefix : str ,
1965+ max_depth : int | None ,
1966+ skip_keys : tuple [str , ...]
1967+ ) -> AsyncGenerator [tuple [str , AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ] | AsyncGroup ], None ]:
1968+ """
1969+ Iterate over the arrays and groups contained in a group, and optionally the
1970+ arrays and groups contained in those groups.
1971+ """
1972+
1973+ to_recurse = []
1974+ do_recursion = max_depth is None or max_depth > 0
1975+ if max_depth is None :
1976+ new_depth = None
1977+ else :
1978+ new_depth = max_depth - 1
1979+
1980+ async for name , node in iter_members (group , skip_keys = skip_keys ):
1981+ yield f'{ prefix } /{ name } ' .lstrip ('/' ), node
1982+ if isinstance (node , AsyncGroup ) and do_recursion :
1983+ to_recurse .append (iter_members_deep (
1984+ node ,
1985+ max_depth = new_depth ,
1986+ prefix = f'{ prefix } /{ name } ' ,
1987+ skip_keys = skip_keys ))
1988+
1989+ for subgroup in to_recurse :
1990+ async for name , node in subgroup :
1991+ yield name , node
1992+
1993+
1994+ def resolve_metadata_v2 (blobs : tuple [str | bytes | bytearray , str | bytes | bytearray ]) -> ArrayV2Metadata | GroupMetadata :
1995+ zarr_metadata = json .loads (blobs [0 ])
1996+ attrs = json .loads (blobs [1 ])
1997+ if 'shape' in zarr_metadata :
1998+ return ArrayV2Metadata .from_dict (zarr_metadata | {'attrs' : attrs })
1999+ else :
2000+ return GroupMetadata .from_dict (zarr_metadata | {'attrs' : attrs })
18942001
18952002def resolve_metadata_v3 (blob : str | bytes | bytearray ) -> ArrayV3Metadata | GroupMetadata :
18962003 zarr_json = json .loads (blob )
0 commit comments