minor improvements: save functions return file names, file manager file filtering more efficient, affine utils

Lars-Kraemer · Lars-Kraemer · commit ba84b5f820d9 · 2025-09-11T12:09:02.000+02:00
diff --git a/src/vidata/analysis/image_analyzer.py b/src/vidata/analysis/image_analyzer.py
@@ -47,7 +47,7 @@ def __init__(self, data_loader: BaseLoader, file_manager: FileManager, nchannels
     def analyze_case(self, index, verbose=False):
         file = self.file_manager[index]
         data, meta = self.data_loader.load(file)
-
+        data = data[...]  # To resolve memmap dtypes
         stats = {
             "name": file.name,
             "dtype": str(data.dtype),
diff --git a/src/vidata/analysis/label_analyzer.py b/src/vidata/analysis/label_analyzer.py
@@ -34,7 +34,7 @@ def __init__(
     def analyze_case(self, index, verbose=False):
         file = self.file_manager[index]
         data, meta = self.data_loader.load(file)
-        # data=data.astype(int)
+        data = data[...]  # To resolve memmap dtypes
         data = data.astype(np.uint8)
 
         stats = {
diff --git a/src/vidata/analysis/viz_utils.py b/src/vidata/analysis/viz_utils.py
@@ -21,7 +21,7 @@ def adjust_layout(
         figure.update_layout(
             xaxis={
                 "title": {"text": xaxis_title, "font": {"size": 18}},  # axis label font
-                "tickfont": {"size": 14},  # tick labels
+                # "tickfont": {"size": 14},  # tick labels
             }
         )
     else:
diff --git a/src/vidata/config_manager.py b/src/vidata/config_manager.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 
 from vidata.file_manager import FileManager, FileManagerStacked
 from vidata.io import load_json
@@ -264,7 +264,6 @@ def file_manager(self, split: str | None = None, fold: int | None = None) -> Fil
         include_names = None
         if self.splits_file is not None and split is not None:
             include_names = self.resolve_splits_file(split, fold)
-
         return manager_cls(
             path=_cfg["path"],
             file_type=_cfg["file_type"],
@@ -320,8 +319,11 @@ def task_manager(self) -> TaskManager:
 
 
 class ConfigManager:
-    def __init__(self, config: dict | DictConfig):
-        self.config = config
+    def __init__(self, config: dict | DictConfig | str):
+        if isinstance(config, str):
+            self.config = OmegaConf.load(config)
+        else:
+            self.config = config
         self.layers = []
 
         split_cfg = self.config.get("splits", {})
@@ -333,6 +335,7 @@ def __init__(self, config: dict | DictConfig):
                     ovrds = split_cfg[k][layer_cfg["name"]]
                     layer_split[k] = ovrds if ovrds is not None else {}
             lcm = LayerConfigManager(layer_cfg, layer_split, split_cfg.get("splits_file"))
+
             self.layers.append(lcm)
 
     @property
@@ -353,7 +356,6 @@ def __len__(self):
 
 
 if __name__ == "__main__":
-    from omegaconf import DictConfig, OmegaConf
 
     path = "../../../dataset_cfg/Cityscapes.yaml"
     cfg = dict(OmegaConf.load(path))
diff --git a/src/vidata/file_manager/file_manager.py b/src/vidata/file_manager/file_manager.py
@@ -38,25 +38,24 @@ def __init__(
         self.pattern = pattern
         self.include_names = include_names
         self.exclude_names = exclude_names
-
         self.collect_files()
         self.filter_files()
 
     def filter_files(self):
         if self.include_names is not None:
+            _files_re = [str(_file.relative_to(self.path)) for _file in self.files]
             self.files = [
                 _file
-                for _file in list(self.files)
-                if any(_token in str(_file.relative_to(self.path)) for _token in self.include_names)
+                for _file, rel in zip(list(self.files), _files_re, strict=False)
+                if any(_token in rel for _token in self.include_names)
             ]
 
         if self.exclude_names is not None:
+            _files_re = [str(_file.relative_to(self.path)) for _file in self.files]
             self.files = [
                 _file
-                for _file in list(self.files)
-                if not any(
-                    _token in str(_file.relative_to(self.path)) for _token in self.exclude_names
-                )
+                for _file, rel in zip(list(self.files), _files_re, strict=False)
+                if not any(_token in rel for _token in self.exclude_names)
             ]
 
     def collect_files(self):
@@ -73,6 +72,14 @@ def collect_files(self):
         files = list(Path(self.path).glob(pattern + self.file_type))
         self.files = natsorted(files, key=lambda p: p.name)
 
+    def get_name(self, file: str | int, with_file_type=True) -> str:
+        if isinstance(file, int):
+            file = str(self.files[file])
+        name = str(Path(file).relative_to(self.path))
+        if not with_file_type:
+            name = name.replace(self.file_type, "")
+        return name
+
     def __getitem__(self, item: int):
         return self.files[item]
 
diff --git a/src/vidata/io/blosc2_io.py b/src/vidata/io/blosc2_io.py
@@ -20,12 +20,12 @@
 def save_blosc2(
     data: np.ndarray,
     file: str,
-    patch_size: Union[tuple[int, int], tuple[int, int, int]],
+    patch_size: Union[tuple[int, int], tuple[int, int, int]] | None = None,
     clevel: int = 8,
     nthreads: int = 8,
     codec: blosc2.Codec = blosc2.Codec.ZSTD,
     metadata: dict | None = None,
-):
+) -> list[str]:
     """Saves a NumPy array to a Blosc2 file with specified compression parameters.
 
     Args:
@@ -37,6 +37,18 @@ def save_blosc2(
         codec (blosc2.Codec, optional): Compression codec. Defaults to blosc2.Codec.ZSTD.
         metadata (Optional[dict], optional): Optional dictionary of metadata to attach. Defaults to None.
     """
+
+    if patch_size is None:
+        _is_float = np.issubdtype(data.dtype.type, np.floating)
+        _is_2d = data.ndim == 2
+
+        # if _is_2d:
+        base_patch_size = (512 if _is_float else 1024) if _is_2d else (64 if _is_float else 96)
+        # else:
+        #    base_patch_size = 64 if _is_float else 96
+
+        patch_size = tuple([min(s, base_patch_size) for s in data.shape])
+
     blocks, chunks = comp_blosc2_params(data.shape, patch_size, data.itemsize)
     blosc2.set_nthreads(nthreads)
     blosc2.asarray(
@@ -48,6 +60,7 @@ def save_blosc2(
         mmap_mode="w+",
         meta=metadata,
     )
+    return [file]
 
 
 @register_loader("image", ".b2nd", backend="blosc2")
@@ -74,7 +87,7 @@ def load_blosc2(file: str, nthreads: int = 1) -> tuple[blosc2.NDArray, dict]:
 def save_blosc2pkl(
     data: np.ndarray,
     file: str,
-    patch_size: Union[tuple[int, int], tuple[int, int, int]],
+    patch_size: Union[tuple[int, int], tuple[int, int, int]] | None = None,
     clevel: int = 8,
     nthreads: int = 8,
     codec: blosc2.Codec = blosc2.Codec.ZSTD,
@@ -92,7 +105,9 @@ def save_blosc2pkl(
         metadata (Optional[dict], optional): Optional dictionary of metadata to attach. Defaults to None.
     """
     save_blosc2(data, file, patch_size=patch_size, clevel=clevel, nthreads=nthreads, codec=codec)
-    save_pickle(metadata, str(file).replace(".b2nd", ".pkl"))
+    file_pkl = str(file).replace(".b2nd", ".pkl")
+    save_pickle(metadata, file_pkl)
+    return [file, file_pkl]
 
 
 @register_loader("image", ".b2nd", backend="blosc2pkl")
diff --git a/src/vidata/io/image_io.py b/src/vidata/io/image_io.py
@@ -13,8 +13,9 @@ def load_image(file: str):
 
 @register_writer("image", ".png", ".jpg", ".jpeg", ".bmp", backend="imageio")
 @register_writer("mask", ".png", ".bmp", backend="imageio")
-def save_image(data: np.ndarray, file: str):
+def save_image(data: np.ndarray, file: str) -> list[str]:
     iio.imwrite(file, data)
+    return [file]
 
 
 # @register_loader("image", ".png", ".jpg", ".jpeg", ".bmp")
diff --git a/src/vidata/io/nib_io.py b/src/vidata/io/nib_io.py
@@ -7,7 +7,7 @@
 
 @register_writer("image", ".nii.gz", ".nii", backend="nibabel")
 @register_writer("mask", ".nii.gz", ".nii", backend="nibabel")
-def save_nib(data, file, metadata: dict | None = None) -> None:
+def save_nib(data, file, metadata: dict | None = None) -> list[str]:
     """
     Save a NumPy array and SITK-style metadata to a NIfTI file using nibabel.
 
@@ -56,6 +56,7 @@ def save_nib(data, file, metadata: dict | None = None) -> None:
 
     image_nib = nib.Nifti1Image(data, affine=affine_nib)
     nib.save(image_nib, str(file))
+    return [file]
 
 
 @register_loader("image", ".nii.gz", ".nii", backend="nibabel")
diff --git a/src/vidata/io/numpy_io.py b/src/vidata/io/numpy_io.py
@@ -1,71 +1,70 @@
-from pathlib import Path
-from typing import Union
-
 import numpy as np
 
 from vidata.registry import register_loader, register_writer
 
 
 @register_loader("image", ".npy", backend="numpy")
 @register_loader("mask", ".npy", backend="numpy")
-def load_npy(path: str) -> np.ndarray:
+def load_npy(file: str) -> np.ndarray:
     """Load a NumPy array from a .npy file.
 
     Args:
-        path (str): Path to the .npy file.
+        file (str): Path to the .npy file.
 
     Returns:
         np.ndarray: Loaded NumPy array.
     """
-    return np.load(path, allow_pickle=False), {}
+    return np.load(file, allow_pickle=False), {}
 
 
 @register_writer("image", ".npy", backend="numpy")
 @register_writer("mask", ".npy", backend="numpy")
-def save_npy(array: np.ndarray, path: Union[str, Path], *args, **kwargs) -> None:
+def save_npy(array: np.ndarray, file: str, *args, **kwargs) -> list[str]:
     """Save a NumPy array to a .npy file.
 
     Args:
         array (np.ndarray): NumPy array to save.
-        path (str): Output file path.
+        file (str): Output file file.
     """
-    np.save(path, array)
+    np.save(file, array)
+    return [file]
 
 
 @register_loader("image", ".npz", backend="numpy")
 @register_loader("mask", ".npz", backend="numpy")
-def load_npz(path: str) -> tuple[dict[str, np.ndarray], dict]:
+def load_npz(file: str) -> tuple[dict[str, np.ndarray], dict]:
     """Load multiple arrays from a .npz file into a dictionary.
 
     Args:
-        path (str): Path to the .npz file.
+        file (str): Path to the .npz file.
 
     Returns:
         dict[str, np.ndarray]: dictionary mapping keys to arrays.
     """
-    with np.load(path) as data:
+    with np.load(file) as data:
         return {key: data[key] for key in data.files}, {}
 
 
 @register_writer("image", ".npz", backend="numpy")
 @register_writer("mask", ".npz", backend="numpy")
 def save_npz(
-    data_dict: dict[str, np.ndarray], path: str, compress: bool = True, *args, **kwargs
-) -> None:
+    data_dict: dict[str, np.ndarray], file: str, compress: bool = True, *args, **kwargs
+) -> list[str]:
     """Save multiple NumPy arrays to a .npz file.
 
     Args:
         data_dict (dict[str, np.ndarray]): dictionary of arrays to save.
-        path (str): Output file path.
+        file (str): Output file file.
         compress (bool, optional): Whether to use compressed format. Defaults to True.
     """
     if compress:
         if isinstance(data_dict, dict):
-            np.savez_compressed(path, **data_dict)
+            np.savez_compressed(file, **data_dict)
         else:
-            np.savez_compressed(path, data_dict)
+            np.savez_compressed(file, data_dict)
     else:
         if isinstance(data_dict, dict):
-            np.savez(path, **data_dict)
+            np.savez(file, **data_dict)
         else:
-            np.savez(path, data_dict)
+            np.savez(file, data_dict)
+    return [file]
diff --git a/src/vidata/io/sitk_io.py b/src/vidata/io/sitk_io.py
@@ -7,7 +7,7 @@
 
 @register_writer("image", ".nii.gz", ".nii", ".mha", ".nrrd", backend="sitk")
 @register_writer("mask", ".nii.gz", ".nii", ".mha", ".nrrd", backend="sitk")
-def save_sitk(data, file, metadata: dict | None = None) -> None:
+def save_sitk(data: np.ndarray, file: str, metadata: dict | None = None) -> list[str]:
     """Save a NumPy array as a medical image file using SimpleITK.
 
     Args:
@@ -34,6 +34,7 @@ def save_sitk(data, file, metadata: dict | None = None) -> None:
             image_sitk.SetDirection(direction.flatten().tolist()[::-1])
 
     sitk.WriteImage(image_sitk, str(file), useCompression=True)
+    return [file]
 
 
 @register_loader("image", ".nii.gz", ".nii", ".mha", ".nrrd", backend="sitk")
diff --git a/src/vidata/io/tif_io.py b/src/vidata/io/tif_io.py
@@ -25,7 +25,7 @@ def save_tif(
     file: str,
     tile_size: int = 256,
     compression: str = "zlib",
-):
+) -> list[str]:
     """Save a NumPy array as a tiled, compressed TIFF (.tif) file.
 
     Args:
@@ -39,3 +39,4 @@ def save_tif(
         "compression": compression,
     }
     tifffile.imwrite(file, data, **options)
+    return [file]
diff --git a/src/vidata/utils/affine.py b/src/vidata/utils/affine.py

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ def adjust_layout(`
`21`	`21`	`figure.update_layout(`
`22`	`22`	`xaxis={`
`23`	`23`	`"title": {"text": xaxis_title, "font": {"size": 18}}, # axis label font`
`24`		`- "tickfont": {"size": 14}, # tick labels`
	`24`	`+ # "tickfont": {"size": 14}, # tick labels`
`25`	`25`	`}`
`26`	`26`	`)`
`27`	`27`	`else:`