✨ Add support for annotationstore

shaneahmed · shaneahmed · commit ff2e1bb22422 · 2025-08-27T16:04:30.000+01:00
diff --git a/tests/engines/test_semantic_segmentor.py b/tests/engines/test_semantic_segmentor.py
@@ -5,7 +5,7 @@
 import json
 import sqlite3
 from pathlib import Path
-from typing import Callable
+from typing import TYPE_CHECKING, Callable
 
 import numpy as np
 import torch
@@ -16,6 +16,9 @@
 from tiatoolbox.utils import env_detection as toolbox_env
 from tiatoolbox.utils.misc import imread
 
+if TYPE_CHECKING:
+    import pytest
+
 device = "cuda" if toolbox_env.has_gpu() else "cpu"
 
 
@@ -160,7 +163,9 @@ def test_save_annotation_store(remote_sample: Callable, tmp_path: Path) -> None:
     _test_store_output_patch(output[0])
 
 
-def test_save_annotation_store_nparray(remote_sample: Callable, tmp_path: Path) -> None:
+def test_save_annotation_store_nparray(
+    remote_sample: Callable, tmp_path: Path, caplog: pytest.LogCaptureFixture
+) -> None:
     """Test for saving output as annotation store using a numpy array."""
     segmentor = SemanticSegmentor(
         model="fcn-tissue_mask", batch_size=32, verbose=False, device=device
@@ -184,7 +189,12 @@ def test_save_annotation_store_nparray(remote_sample: Callable, tmp_path: Path)
     assert output[0] == tmp_path / "output1" / "0.db"
     assert output[1] == tmp_path / "output1" / "1.db"
 
-    assert (tmp_path / "output1.zarr").exists()
+    assert (tmp_path / "output1" / "output.zarr").exists()
+
+    zarr_group = zarr.open(str(tmp_path / "output1" / "output.zarr"), mode="r")
+    assert "probabilities" in zarr_group
+
+    assert "Probability maps cannot be saved as AnnotationStore." in caplog.text
 
     _test_store_output_patch(output[0])
     _test_store_output_patch(output[1])
@@ -201,6 +211,7 @@ def test_save_annotation_store_nparray(remote_sample: Callable, tmp_path: Path)
 
     assert output[0] == tmp_path / "output2" / "0.db"
     assert output[1] == tmp_path / "output2" / "1.db"
+    assert not (tmp_path / "output2" / "output.zarr").exists()
 
     assert len(output) == 2
 
@@ -294,7 +305,9 @@ def test_wsi_segmentor_zarr(
     assert 0.48 < np.mean(output_["probabilities"][:]) < 0.52
 
 
-def test_wsi_segmentor_annotationstore(sample_svs: Path, tmp_path: Path) -> None:
+def test_wsi_segmentor_annotationstore(
+    sample_svs: Path, tmp_path: Path, caplog: pytest.CaptureFixture
+) -> None:
     """Test SemanticSegmentor for WSIs with AnnotationStore output."""
     segmentor = SemanticSegmentor(
         model="fcn-tissue_mask",
@@ -314,3 +327,30 @@ def test_wsi_segmentor_annotationstore(sample_svs: Path, tmp_path: Path) -> None
     )
 
     assert output[sample_svs] == tmp_path / "wsi_out_check" / (sample_svs.stem + ".db")
+
+    # Return Probabilities
+    segmentor = SemanticSegmentor(
+        model="fcn-tissue_mask",
+        batch_size=32,
+        verbose=False,
+    )
+    # Return Probabilities is False
+    output = segmentor.run(
+        images=[sample_svs],
+        return_probabilities=True,
+        return_labels=False,
+        device=device,
+        patch_mode=False,
+        save_dir=tmp_path / "wsi_prob_out_check",
+        verbose=True,
+        output_type="annotationstore",
+    )
+
+    assert output[sample_svs] == tmp_path / "wsi_prob_out_check" / (
+        sample_svs.stem + ".db"
+    )
+    assert output[sample_svs].with_suffix(".zarr").exists()
+
+    zarr_group = zarr.open(output[sample_svs].with_suffix(".zarr"), mode="r")
+    assert "probabilities" in zarr_group
+    assert "Probability maps cannot be saved as AnnotationStore." in caplog.text
diff --git a/tiatoolbox/models/engine/patch_predictor.py b/tiatoolbox/models/engine/patch_predictor.py
@@ -24,6 +24,8 @@ class to support patch-based and whole slide image (WSI) inference using deep le
 
 from typing_extensions import Unpack
 
+from tiatoolbox.utils.misc import cast_to_min_dtype
+
 from .engine_abc import EngineABC, EngineABCRunParams
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -348,7 +350,8 @@ def post_process_patches(
         _ = kwargs.get("return_probabilities")
         _ = prediction_shape
         _ = prediction_dtype
-        return self.model.postproc_func(raw_predictions)
+        raw_predictions = self.model.postproc_func(raw_predictions)
+        return cast_to_min_dtype(raw_predictions)
 
     def post_process_wsi(
         self: PatchPredictor,
diff --git a/tiatoolbox/models/engine/semantic_segmentor.py b/tiatoolbox/models/engine/semantic_segmentor.py
@@ -66,9 +66,9 @@
 from tiatoolbox.models.dataset.dataset_abc import WSIPatchDataset
 from tiatoolbox.utils.misc import (
     dict_to_store_semantic_segmentor,
-    dict_to_zarr,
     get_tqdm,
 )
+from tiatoolbox.wsicore.wsireader import is_zarr
 
 from .patch_predictor import PatchPredictor, PredictorRunParams
 
@@ -599,12 +599,22 @@ def save_predictions(
                 processed_predictions, output_type, save_path=save_path, **kwargs
             )
 
-        logger.info("Saving predictions as AnnotationStore.")
+        return_probabilities = kwargs.get("return_probabilities", False)
+        output_type_ = (
+            "zarr"
+            if is_zarr(save_path.with_suffix(".zarr")) or return_probabilities
+            else "dict"
+        )
+
         processed_predictions = super().save_predictions(
-            processed_predictions, output_type="dict", **kwargs
+            processed_predictions,
+            output_type=output_type_,
+            save_path=save_path.with_suffix(".zarr"),
+            **kwargs,
         )
 
-        return_probabilities = kwargs.get("return_probabilities", False)
+        if isinstance(processed_predictions, Path):
+            processed_predictions = zarr.open(str(processed_predictions), mode="r")
 
         # scale_factor set from kwargs
         scale_factor = kwargs.get("scale_factor", (1.0, 1.0))
@@ -614,6 +624,7 @@ def save_predictions(
         # Need to add support for zarr conversion.
         save_paths = []
 
+        logger.info("Saving predictions as AnnotationStore.")
         if self.patch_mode:
             for i, predictions in enumerate(processed_predictions["predictions"]):
                 if isinstance(self.images[i], Path):
@@ -639,21 +650,13 @@ def save_predictions(
             save_paths = out_file
 
         if return_probabilities:
-            zarr_save_path = save_path.parent.with_suffix(".zarr")
             msg = (
                 f"Probability maps cannot be saved as AnnotationStore. "
                 f"To visualise heatmaps in TIAToolbox Visualization tool,"
-                f"convert heatmaps in {zarr_save_path} to ome.tiff using"
+                f"convert heatmaps in {save_path} to ome.tiff using"
                 f"tiatoolbox.utils.misc.write_probability_heatmap_as_ome_tiff."
             )
             logger.info(msg)
-            processed_predictions = {
-                "predictions": processed_predictions.get("predictions"),
-            }
-            dict_to_zarr(
-                raw_predictions=processed_predictions,
-                save_path=zarr_save_path,
-            )
 
         return save_paths
 
diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py
@@ -11,6 +11,7 @@
 from typing import IO, TYPE_CHECKING
 
 import cv2
+import dask.array as da
 import joblib
 import numcodecs
 import numpy as np
@@ -1369,12 +1370,11 @@ def dict_to_store_semantic_segmentor(
             for each patch.
 
     """
-    preds = patch_output["predictions"]
+    preds = da.from_array(patch_output["predictions"], chunks="auto")
 
     # Get the number of unique predictions
-    layer_list = np.unique(preds)
-
-    layer_list = np.delete(layer_list, np.where(layer_list == 0))
+    layer_list = da.unique(preds).compute()
+    layer_list = layer_list[layer_list != 0]
 
     store = SQLiteStore()
 
@@ -1383,13 +1383,12 @@ def dict_to_store_semantic_segmentor(
     annotations_list: list[Annotation] = []
 
     for type_class in layer_list:
-        layer = np.where(preds[:] == type_class, 1, 0)
+        layer = da.where(preds == type_class, 1, 0).astype("uint8").compute()
         contours, hierarchy = cv2.findContours(
-            layer.astype("uint8"),
+            layer,
             cv2.RETR_CCOMP,
             cv2.CHAIN_APPROX_NONE,
         )
-
         annotations_list_ = process_contours(contours, hierarchy, scale_factor)
         annotations_list.extend(annotations_list_)
 
@@ -1815,3 +1814,37 @@ def get_tqdm() -> type[tqdm_notebook | tqdm]:
     if is_notebook():  # pragma: no cover
         return tqdm_notebook.tqdm
     return tqdm
+
+
+def cast_to_min_dtype(array: np.ndarray | da.Array) -> np.ndarray | da.Array:
+    """Cast the input array to the minimal data type required to represent its values.
+
+    This function determines the maximum value in the array and casts it to the smallest
+    unsigned integer type (or boolean) that can accommodate all values. It supports both
+    NumPy and Dask arrays and preserves the input type in the output.
+
+    For Dask arrays, the maximum value is computed lazily and only when needed.
+
+    Args:
+        array (Union[np.ndarray, da.Array]): Input array containing integer values.
+
+    Returns:
+        (np.ndarray or da.Array):
+             A copy of the input array cast to the minimal required dtype.
+            - If the maximum value is 1, the array is cast to boolean.
+            - Otherwise, it is cast to the smallest suitable unsigned integer type.
+
+    """
+    is_dask = isinstance(array, da.Array)
+    max_value = da.max(array) if is_dask else np.max(array)
+    max_value = max_value.compute() if is_dask else max_value
+
+    if max_value == 1:
+        return array.astype(bool)
+
+    dtypes = [np.uint8, np.uint16, np.uint32, np.uint64]
+    for dtype in dtypes:
+        if max_value <= np.iinfo(dtype).max:
+            return array.astype(dtype)
+
+    return array