Skip to content

Commit 0d2d6ab

Browse files
authored
Fix hdf typing (#380)
* explicit module names * protect imports within TYPE_CHECKING bloc * ignore weird attr-defined error
1 parent 908ad94 commit 0d2d6ab

File tree

1 file changed

+30
-21
lines changed
  • virtualizarr/readers/hdf

1 file changed

+30
-21
lines changed

virtualizarr/readers/hdf/hdf.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
import math
22
from pathlib import Path
3-
from typing import TYPE_CHECKING, Dict, Iterable, List, Mapping, Optional, Union
3+
from typing import (
4+
TYPE_CHECKING,
5+
Any,
6+
Dict,
7+
Iterable,
8+
List,
9+
Mapping,
10+
Optional,
11+
Union,
12+
)
413

514
import numpy as np
615
import xarray as xr
7-
from xarray import Dataset, Index, Variable
816

917
from virtualizarr.manifests import (
1018
ChunkEntry,
@@ -22,17 +30,15 @@
2230
from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions, soft_import
2331
from virtualizarr.zarr import ZArray
2432

25-
if TYPE_CHECKING:
26-
import h5py # type: ignore
27-
from h5py import Dataset, Group # type: ignore
28-
2933
h5py = soft_import("h5py", "For reading hdf files", strict=False)
30-
if h5py:
31-
Dataset = h5py.Dataset # type: ignore
32-
Group = h5py.Group # type: ignore
34+
35+
36+
if TYPE_CHECKING:
37+
from h5py import Dataset as H5Dataset # type: ignore[import-untyped]
38+
from h5py import Group as H5Group # type: ignore[import-untyped]
3339
else:
34-
Dataset = dict() # type: ignore
35-
Group = dict() # type: ignore
40+
H5Dataset: Any = None
41+
H5Group: Any = None
3642

3743

3844
class HDFVirtualBackend(VirtualBackend):
@@ -43,7 +49,7 @@ def open_virtual_dataset(
4349
drop_variables: Iterable[str] | None = None,
4450
loadable_variables: Iterable[str] | None = None,
4551
decode_times: bool | None = None,
46-
indexes: Mapping[str, Index] | None = None,
52+
indexes: Mapping[str, xr.Index] | None = None,
4753
virtual_backend_kwargs: Optional[dict] = None,
4854
reader_options: Optional[dict] = None,
4955
) -> xr.Dataset:
@@ -92,7 +98,10 @@ def open_virtual_dataset(
9298
)
9399

94100
@staticmethod
95-
def _dataset_chunk_manifest(path: str, dataset: Dataset) -> Optional[ChunkManifest]:
101+
def _dataset_chunk_manifest(
102+
path: str,
103+
dataset: H5Dataset,
104+
) -> Optional[ChunkManifest]:
96105
"""
97106
Generate ChunkManifest for HDF5 dataset.
98107
@@ -116,7 +125,7 @@ def _dataset_chunk_manifest(path: str, dataset: Dataset) -> Optional[ChunkManife
116125
key_list = [0] * (len(dataset.shape) or 1)
117126
key = ".".join(map(str, key_list))
118127

119-
chunk_entry = ChunkEntry.with_validation(
128+
chunk_entry: ChunkEntry = ChunkEntry.with_validation( # type: ignore[attr-defined]
120129
path=path, offset=dsid.get_offset(), length=dsid.get_storage_size()
121130
)
122131
chunk_key = ChunkKey(key)
@@ -160,7 +169,7 @@ def add_chunk_info(blob):
160169
return chunk_manifest
161170

162171
@staticmethod
163-
def _dataset_dims(dataset: Dataset) -> Union[List[str], List[None]]:
172+
def _dataset_dims(dataset: H5Dataset) -> Union[List[str], List[None]]:
164173
"""
165174
Get a list of dimension scale names attached to input HDF5 dataset.
166175
@@ -202,7 +211,7 @@ def _dataset_dims(dataset: Dataset) -> Union[List[str], List[None]]:
202211
return dims
203212

204213
@staticmethod
205-
def _extract_attrs(h5obj: Union[Dataset, Group]):
214+
def _extract_attrs(h5obj: Union[H5Dataset, H5Group]):
206215
"""
207216
Extract attributes from an HDF5 group or dataset.
208217
@@ -248,7 +257,7 @@ def _extract_attrs(h5obj: Union[Dataset, Group]):
248257
return attrs
249258

250259
@staticmethod
251-
def _dataset_to_variable(path: str, dataset: Dataset) -> Optional[Variable]:
260+
def _dataset_to_variable(path: str, dataset: H5Dataset) -> Optional[xr.Variable]:
252261
"""
253262
Extract an xarray Variable with ManifestArray data from an h5py dataset
254263
@@ -300,9 +309,9 @@ def _dataset_to_variable(path: str, dataset: Dataset) -> Optional[Variable]:
300309
manifest = HDFVirtualBackend._dataset_chunk_manifest(path, dataset)
301310
if manifest:
302311
marray = ManifestArray(zarray=zarray, chunkmanifest=manifest)
303-
variable = Variable(data=marray, dims=dims, attrs=attrs)
312+
variable = xr.Variable(data=marray, dims=dims, attrs=attrs)
304313
else:
305-
variable = Variable(data=np.empty(dataset.shape), dims=dims, attrs=attrs)
314+
variable = xr.Variable(data=np.empty(dataset.shape), dims=dims, attrs=attrs)
306315
return variable
307316

308317
@staticmethod
@@ -313,7 +322,7 @@ def _virtual_vars_from_hdf(
313322
reader_options: Optional[dict] = {
314323
"storage_options": {"key": "", "secret": "", "anon": True}
315324
},
316-
) -> Dict[str, Variable]:
325+
) -> Dict[str, xr.Variable]:
317326
"""
318327
Extract xarray Variables with ManifestArray data from an HDF file or group
319328
@@ -350,7 +359,7 @@ def _virtual_vars_from_hdf(
350359
variables = {}
351360
for key in g.keys():
352361
if key not in drop_variables:
353-
if isinstance(g[key], Dataset):
362+
if isinstance(g[key], h5py.Dataset):
354363
variable = HDFVirtualBackend._dataset_to_variable(path, g[key])
355364
if variable is not None:
356365
variables[key] = variable

0 commit comments

Comments
 (0)