11import collections .abc
2- from pathlib import Path
3- from typing import Any , Mapping , Optional , Sequence , Union
2+ import warnings
3+ from pathlib import Path , PurePosixPath
4+ from typing import Any , Mapping , Optional , Sequence , Tuple , Union
45
6+ import h5py
7+ import numpy as np
58from imageio .v3 import imread , imwrite
69from loguru import logger
710from numpy .typing import NDArray
1518from .stat_measures import DatasetMeasure , MeasureValue
1619from .tensor import Tensor
1720
21+ DEFAULT_H5_DATASET_PATH = "data"
22+
1823
1924def load_image (path : Path , is_volume : Optional [bool ] = None ) -> NDArray [Any ]:
2025 """load a single image as numpy array
@@ -23,9 +28,38 @@ def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
2328 path: image path
2429 is_volume: deprecated
2530 """
26- ext = path .suffix
27- if ext == ".npy" :
31+ if is_volume is not None :
32+ warnings .warn ("**is_volume** is deprecated and will be removed soon." )
33+
34+ file_path , subpath = _split_dataset_path (Path (path ))
35+
36+ if file_path .suffix == ".npy" :
37+ if subpath is not None :
38+ raise ValueError (f"Unexpected subpath { subpath } for .npy path { path } " )
2839 return load_array (path )
40+ elif file_path .suffix in (".h5" , ".hdf" , ".hdf5" ):
41+ if subpath is None :
42+ dataset_path = DEFAULT_H5_DATASET_PATH
43+ else :
44+ dataset_path = str (subpath )
45+
46+ with h5py .File (file_path , "r" ) as f :
47+ h5_dataset = f .get ( # pyright: ignore[reportUnknownVariableType]
48+ dataset_path
49+ )
50+ if not isinstance (h5_dataset , h5py .Dataset ):
51+ raise ValueError (
52+ f"{ path } is not of type { h5py .Dataset } , but has type "
53+ + str (
54+ type (h5_dataset ) # pyright: ignore[reportUnknownArgumentType]
55+ )
56+ )
57+ image : NDArray [Any ]
58+ image = h5_dataset [:] # pyright: ignore[reportUnknownVariableType]
59+ assert isinstance (image , np .ndarray ), type (
60+ image # pyright: ignore[reportUnknownArgumentType]
61+ )
62+ return image # pyright: ignore[reportUnknownVariableType]
2963 else :
3064 return imread (path ) # pyright: ignore[reportUnknownVariableType]
3165
@@ -37,14 +71,53 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor
3771 return Tensor .from_numpy (array , dims = axes )
3872
3973
74+ def _split_dataset_path (path : Path ) -> Tuple [Path , Optional [PurePosixPath ]]:
75+ """Split off subpath (e.g. internal h5 dataset path)
76+ from a file path following a file extension.
77+
78+ Examples:
79+ >>> _split_dataset_path(Path("my_file.h5/dataset"))
80+ (Path("my_file.h5"), PurePosixPath("dataset"))
81+
82+ If no suffix is detected the path is returned with
83+ >>> _split_dataset_path(Path("my_plain_file"))
84+ (Path("my_plain_file"), None)
85+
86+ """
87+ if path .suffix :
88+ return path , None
89+
90+ for p in path .parents :
91+ if p .suffix :
92+ return p , PurePosixPath (path .relative_to (p ))
93+
94+ return path , None
95+
96+
4097def save_tensor (path : Path , tensor : Tensor ) -> None :
4198 # TODO: save axis meta data
4299
43100 data : NDArray [Any ] = tensor .data .to_numpy ()
44- path = Path (path )
45- path .parent .mkdir (exist_ok = True , parents = True )
46- if path .suffix == ".npy" :
47- save_array (path , data )
101+ file_path , subpath = _split_dataset_path (Path (path ))
102+ if not file_path .suffix :
103+ raise ValueError (f"No suffix (needed to decide file format) found in { path } " )
104+
105+ file_path .parent .mkdir (exist_ok = True , parents = True )
106+ if file_path .suffix == ".npy" :
107+ if subpath is not None :
108+ raise ValueError (f"Unexpected subpath { subpath } found in .npy path { path } " )
109+ save_array (file_path , data )
110+ elif file_path .suffix in (".h5" , ".hdf" , ".hdf5" ):
111+ if subpath is None :
112+ dataset_path = DEFAULT_H5_DATASET_PATH
113+ else :
114+ dataset_path = str (subpath )
115+
116+ with h5py .File (file_path , "a" ) as f :
117+ if dataset_path in f :
118+ del f [dataset_path ]
119+
120+ _ = f .create_dataset (dataset_path , data = data , chunks = True )
48121 else :
49122 # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
50123 # tensor = tensor[{a: 0 for a in singleton_axes}]
0 commit comments