@@ -169,7 +169,7 @@ def add_chunk_info(blob):
169169 return chunk_manifest
170170
171171 @staticmethod
172- def _dataset_dims (dataset : H5Dataset ) -> Union [ List [str ], List [ None ] ]:
172+ def _dataset_dims (dataset : H5Dataset , group : str = "" ) -> List [str ]:
173173 """
174174 Get a list of dimension scale names attached to input HDF5 dataset.
175175
@@ -181,10 +181,12 @@ def _dataset_dims(dataset: H5Dataset) -> Union[List[str], List[None]]:
181181 ----------
182182 dataset : h5py.Dataset
183183 An h5py dataset.
184+ group : str
185+ Name of the group we are pulling these dimensions from. Required for potentially removing subgroup prefixes.
184186
185187 Returns
186188 -------
187- list
189+ list[str]
188190 List with HDF5 path names of dimension scales attached to input
189191 dataset.
190192 """
@@ -208,7 +210,11 @@ def _dataset_dims(dataset: H5Dataset) -> Union[List[str], List[None]]:
208210 # In this case, we mimic netCDF4 and assign phony dimension names.
209211 # See https://github.com/fsspec/kerchunk/issues/41
210212 dims .append (f"phony_dim_{ n } " )
211- return dims
213+
214+ if not group .endswith ("/" ):
215+ group += "/"
216+
217+ return [dim .removeprefix (group ) for dim in dims ]
212218
213219 @staticmethod
214220 def _extract_attrs (h5obj : Union [H5Dataset , H5Group ]):
@@ -257,20 +263,25 @@ def _extract_attrs(h5obj: Union[H5Dataset, H5Group]):
257263 return attrs
258264
259265 @staticmethod
260- def _dataset_to_variable (path : str , dataset : H5Dataset ) -> Optional [xr .Variable ]:
266+ def _dataset_to_variable (
267+ path : str ,
268+ dataset : H5Dataset ,
269+ group : str ,
270+ ) -> Optional [xr .Variable ]:
261271 """
262272 Extract an xarray Variable with ManifestArray data from an h5py dataset
263273
264274 Parameters
265275 ----------
266276 dataset : h5py.Dataset
267277 An h5py dataset.
278+ group : str
279+ Name of the group containing this h5py.Dataset.
268280
269281 Returns
270282 -------
271283 list: xarray.Variable
272284 A list of xarray variables.
273-
274285 """
275286 # This chunk determination logic mirrors zarr-python's create
276287 # https://github.com/zarr-developers/zarr-python/blob/main/zarr/creation.py#L62-L66
@@ -305,7 +316,7 @@ def _dataset_to_variable(path: str, dataset: H5Dataset) -> Optional[xr.Variable]
305316 shape = dataset .shape ,
306317 zarr_format = 2 ,
307318 )
308- dims = HDFVirtualBackend ._dataset_dims (dataset )
319+ dims = HDFVirtualBackend ._dataset_dims (dataset , group = group )
309320 manifest = HDFVirtualBackend ._dataset_chunk_manifest (path , dataset )
310321 if manifest :
311322 marray = ManifestArray (zarray = zarray , chunkmanifest = manifest )
@@ -330,37 +341,44 @@ def _virtual_vars_from_hdf(
330341 ----------
331342 path: str
332343 The path of the hdf5 file.
333- group: str
334- The name of the group for which to extract variables.
344+ group: str, optional
345+ The name of the group for which to extract variables. None refers to the root group.
335346 drop_variables: list of str
336347 A list of variable names to skip extracting.
337348 reader_options: dict
338- A dictionary of reader options passed to fsspec when opening the
339- file.
349+ A dictionary of reader options passed to fsspec when opening the file.
340350
341351 Returns
342352 -------
343353 dict
344354 A dictionary of Xarray Variables with the variable names as keys.
345-
346355 """
347356 if drop_variables is None :
348357 drop_variables = []
358+
349359 open_file = _FsspecFSFromFilepath (
350360 filepath = path , reader_options = reader_options
351361 ).open_file ()
352362 f = h5py .File (open_file , mode = "r" )
353- if group :
363+
364+ if group is not None :
354365 g = f [group ]
366+ group_name = group
355367 if not isinstance (g , h5py .Group ):
356368 raise ValueError ("The provided group is not an HDF group" )
357369 else :
358370 g = f
371+ group_name = ""
372+
359373 variables = {}
360374 for key in g .keys ():
361375 if key not in drop_variables :
362376 if isinstance (g [key ], h5py .Dataset ):
363- variable = HDFVirtualBackend ._dataset_to_variable (path , g [key ])
377+ variable = HDFVirtualBackend ._dataset_to_variable (
378+ path = path ,
379+ dataset = g [key ],
380+ group = group_name ,
381+ )
364382 if variable is not None :
365383 variables [key ] = variable
366384 else :
0 commit comments