Skip to content

Commit 4cbf9ef

Browse files
committed
add explicit support for h5
1 parent 4da6766 commit 4cbf9ef

File tree

7 files changed

+125
-8
lines changed

7 files changed

+125
-8
lines changed

bioimageio/core/io.py

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import collections.abc
2-
from pathlib import Path
3-
from typing import Any, Mapping, Optional, Sequence, Union
2+
import warnings
3+
from pathlib import Path, PurePosixPath
4+
from typing import Any, Mapping, Optional, Sequence, Tuple, Union
45

6+
import h5py
7+
import numpy as np
58
from imageio.v3 import imread, imwrite
69
from loguru import logger
710
from numpy.typing import NDArray
@@ -15,6 +18,8 @@
1518
from .stat_measures import DatasetMeasure, MeasureValue
1619
from .tensor import Tensor
1720

21+
DEFAULT_H5_DATASET_PATH = "data"
22+
1823

1924
def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
2025
"""load a single image as numpy array
@@ -23,9 +28,38 @@ def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
2328
path: image path
2429
is_volume: deprecated
2530
"""
26-
ext = path.suffix
27-
if ext == ".npy":
31+
if is_volume is not None:
32+
warnings.warn("**is_volume** is deprecated and will be removed soon.")
33+
34+
file_path, subpath = _split_dataset_path(Path(path))
35+
36+
if file_path.suffix == ".npy":
37+
if subpath is not None:
38+
raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}")
2839
return load_array(path)
40+
elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
41+
if subpath is None:
42+
dataset_path = DEFAULT_H5_DATASET_PATH
43+
else:
44+
dataset_path = str(subpath)
45+
46+
with h5py.File(file_path, "r") as f:
47+
h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType]
48+
dataset_path
49+
)
50+
if not isinstance(h5_dataset, h5py.Dataset):
51+
raise ValueError(
52+
f"{path} is not of type {h5py.Dataset}, but has type "
53+
+ str(
54+
type(h5_dataset) # pyright: ignore[reportUnknownArgumentType]
55+
)
56+
)
57+
image: NDArray[Any]
58+
image = h5_dataset[:] # pyright: ignore[reportUnknownVariableType]
59+
assert isinstance(image, np.ndarray), type(
60+
image # pyright: ignore[reportUnknownArgumentType]
61+
)
62+
return image # pyright: ignore[reportUnknownVariableType]
2963
else:
3064
return imread(path) # pyright: ignore[reportUnknownVariableType]
3165

@@ -37,14 +71,53 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor
3771
return Tensor.from_numpy(array, dims=axes)
3872

3973

74+
def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]:
75+
"""Split off subpath (e.g. internal h5 dataset path)
76+
from a file path following a file extension.
77+
78+
Examples:
79+
>>> _split_dataset_path(Path("my_file.h5/dataset"))
80+
(Path("my_file.h5"), PurePosixPath("dataset"))
81+
82+
If no suffix is detected the path is returned with
83+
>>> _split_dataset_path(Path("my_plain_file"))
84+
(Path("my_plain_file"), None)
85+
86+
"""
87+
if path.suffix:
88+
return path, None
89+
90+
for p in path.parents:
91+
if p.suffix:
92+
return p, PurePosixPath(path.relative_to(p))
93+
94+
return path, None
95+
96+
4097
def save_tensor(path: Path, tensor: Tensor) -> None:
4198
# TODO: save axis meta data
4299

43100
data: NDArray[Any] = tensor.data.to_numpy()
44-
path = Path(path)
45-
path.parent.mkdir(exist_ok=True, parents=True)
46-
if path.suffix == ".npy":
47-
save_array(path, data)
101+
file_path, subpath = _split_dataset_path(Path(path))
102+
if not file_path.suffix:
103+
raise ValueError(f"No suffix (needed to decide file format) found in {path}")
104+
105+
file_path.parent.mkdir(exist_ok=True, parents=True)
106+
if file_path.suffix == ".npy":
107+
if subpath is not None:
108+
raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}")
109+
save_array(file_path, data)
110+
elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
111+
if subpath is None:
112+
dataset_path = DEFAULT_H5_DATASET_PATH
113+
else:
114+
dataset_path = str(subpath)
115+
116+
with h5py.File(file_path, "a") as f:
117+
if dataset_path in f:
118+
del f[dataset_path]
119+
120+
_ = f.create_dataset(dataset_path, data=data, chunks=True)
48121
else:
49122
# if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
50123
# tensor = tensor[{a: 0 for a in singleton_axes}]

dev/env-py38.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- black
99
- crick # uncommented
1010
- filelock
11+
- h5py
1112
- imageio>=2.5
1213
- jupyter
1314
- jupyter-black

dev/env-tf.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- black
99
# - crick # currently requires python<=3.9
1010
- filelock
11+
- h5py
1112
- imageio>=2.5
1213
- jupyter
1314
- jupyter-black

dev/env-wo-python.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- black
1010
# - crick # currently requires python<=3.9
1111
- filelock
12+
- h5py
1213
- imageio>=2.5
1314
- jupyter
1415
- jupyter-black

dev/env.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ dependencies:
66
- black
77
# - crick # currently requires python<=3.9
88
- filelock
9+
- h5py
910
- imageio>=2.5
1011
- jupyter
1112
- jupyter-black

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,12 @@
2626
"Programming Language :: Python :: 3.10",
2727
"Programming Language :: Python :: 3.11",
2828
"Programming Language :: Python :: 3.12",
29+
"Programming Language :: Python :: 3.13",
2930
],
3031
packages=find_namespace_packages(exclude=["tests"]),
3132
install_requires=[
3233
"bioimageio.spec ==0.5.3.3",
34+
"h5py",
3335
"imageio>=2.10",
3436
"loguru",
3537
"numpy",

tests/test_io.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from pathlib import Path
2+
from typing import Tuple
3+
4+
import numpy as np
5+
import pytest
6+
7+
8+
@pytest.mark.parametrize(
9+
"name",
10+
[
11+
"img.png",
12+
"img.tiff",
13+
"img.h5",
14+
"img.h5/img",
15+
"img.npy",
16+
],
17+
)
18+
@pytest.mark.parametrize(
19+
"shape",
20+
[
21+
(4, 5),
22+
(3, 4, 5),
23+
(1, 4, 5),
24+
(5, 4, 3),
25+
(5, 3, 4),
26+
],
27+
)
28+
def test_image_io(name: str, shape: Tuple[int, ...], tmp_path: Path):
29+
from bioimageio.core import Tensor
30+
from bioimageio.core.io import load_tensor, save_tensor
31+
32+
path = tmp_path / name
33+
data = Tensor.from_numpy(
34+
np.arange(np.prod(shape), dtype=np.uint8).reshape(shape), dims=None
35+
)
36+
save_tensor(path, data)
37+
actual = load_tensor(path)
38+
assert actual == data

0 commit comments

Comments
 (0)