reduce code complexity

Jiaqi-Lv · Jiaqi-Lv · commit abf02f6bdbaa · 2025-12-12T18:33:04.000Z
diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py
@@ -10,7 +10,10 @@
 import zarr
 
 from tiatoolbox.annotation.storage import SQLiteStore
-from tiatoolbox.models.engine.nucleus_detector import NucleusDetector
+from tiatoolbox.models.engine.nucleus_detector import (
+    NucleusDetector,
+    _flatten_predictions_to_dask,
+)
 from tiatoolbox.utils import env_detection as toolbox_env
 from tiatoolbox.utils.misc import imwrite
 from tiatoolbox.wsicore.wsireader import WSIReader
@@ -43,34 +46,15 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) -
         save_dir=save_dir,
         overwrite=True,
         batch_size=8,
+        class_dict={0: "test_nucleus"},
     )
 
     store = SQLiteStore.open(save_dir / "wsi4_512_512.db")
     assert 255 <= len(store.values()) <= 265
+    annotation = next(iter(store.values()))
+    assert annotation.properties["type"] == "test_nucleus"
     store.close()
 
-    result_path = nucleus_detector.run(
-        patch_mode=False,
-        device=device,
-        output_type="zarr",
-        memory_threshold=50,
-        images=[mini_wsi_svs],
-        save_dir=save_dir,
-        overwrite=True,
-        batch_size=8,
-    )
-
-    zarr_path = result_path[mini_wsi_svs]
-    zarr_group = zarr.open(zarr_path, mode="r")
-    xs = zarr_group["x"][:]
-    ys = zarr_group["y"][:]
-    types = zarr_group["types"][:]
-    probs = zarr_group["probs"][:]
-    assert 255 <= len(xs) <= 265
-    assert 255 <= len(ys) <= 265
-    assert 255 <= len(types) <= 265
-    assert 255 <= len(probs) <= 265
-
     nucleus_detector.drop_keys = ["probs"]
     result_path = nucleus_detector.run(
         patch_mode=False,
@@ -330,3 +314,32 @@ def test_write_detection_records_to_store_no_class_dict() -> None:
     annotation = next(iter(dummy_store.values()))
     assert annotation.properties["type"] == 0
     dummy_store.close()
+
+
+def test_flatten_predictions_to_dask() -> None:
+    """Test flattening ragged predictions to Dask array."""
+    ragged_obj_array = np.empty(3, dtype=object)
+    ragged_obj_array[0] = np.array([1.0, 0.0], dtype=np.float32)
+    ragged_obj_array[1] = np.array([0.5, 0.5], dtype=np.float32)
+    ragged_obj_array[2] = np.array([0.2, 0.8, 0.8, 0.2], dtype=np.float32)
+
+    ragged_da_array = da.from_array(ragged_obj_array, chunks=(len(ragged_obj_array),))
+
+    flat_dask_array = _flatten_predictions_to_dask(ragged_da_array)
+    expected_array = np.array(
+        [
+            1.0,
+            0.0,
+            0.5,
+            0.5,
+            0.2,
+            0.8,
+            0.8,
+            0.2,
+        ],
+        dtype=np.float32,
+    )
+    np.testing.assert_array_equal(flat_dask_array.compute(), expected_array)
+
+    flat_dask_array = _flatten_predictions_to_dask(ragged_obj_array)
+    np.testing.assert_array_equal(flat_dask_array.compute(), expected_array)
diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py
@@ -38,10 +38,9 @@ def _flatten_predictions_to_dask(
     """Normalise predictions to a flat 1D Dask array."""
     # # Case 1: already a Dask array
     if isinstance(arr, da.Array):
-        # If it's already a flat numeric Dask array, just return it
+        # If it's already a numeric Dask array, just return it
         if arr.dtype != object:
             return arr
-        # Object-dtype Dask array: materialise then treat as list
         arr = arr.compute()
 
     arr_list = list(arr)
@@ -134,37 +133,20 @@ def post_process_patches(
                 - "probs": dask array of detection probabilities (np.float32).
 
         """
+        logger.info("Post processing patch predictions in NucleusDetector")
         _ = kwargs.get("return_probabilities")
         _ = prediction_shape
         _ = prediction_dtype
 
-        # Ensure chunks are full in spatial/channel dims; batch dim can vary
-        raw_predictions = raw_predictions.rechunk({0: 1})
-
-        def block_fn(block: np.ndarray) -> np.ndarray:
-            """Apply model's post-processing function to each block.
-
-            Args:
-                block: (b_chunk, H, W, C) NumPy array representing a chunk of
-                raw patch predictions.
-            returns:
-                Processed NumPy array after applying the model's post-processing.
-            """
-            return np.stack(
-                [self.model.postproc_func(sample) for sample in block], axis=0
+        detection_arrays = []
+        for i in range(raw_predictions.shape[0]):
+            patch_pred = raw_predictions[i]
+            postproc_map = da.from_array(
+                self.model.postproc(patch_pred), chunks=patch_pred.chunks
+            )
+            detection_arrays.append(
+                self._centroid_maps_to_detection_arrays(postproc_map)
             )
-
-        postproc_maps = da.map_blocks(
-            block_fn,
-            raw_predictions,
-            dtype=raw_predictions.dtype,
-        )
-
-        # Convert each patch's centroid map to detection records and aggregate
-        detections = [
-            self._centroid_maps_to_detection_arrays(postproc_maps[i])
-            for i in range(postproc_maps.shape[0])
-        ]
 
         def to_object_da(arrs: list[da.Array]) -> da.Array:
             """Wrap list of variable-length arrays into object-dtype dask array."""
@@ -177,10 +159,10 @@ def to_object_da(arrs: list[da.Array]) -> da.Array:
             return da.from_array(obj_array, chunks=(len(arrs),))
 
         return {
-            "x": to_object_da([det["x"] for det in detections]),
-            "y": to_object_da([det["y"] for det in detections]),
-            "types": to_object_da([det["types"] for det in detections]),
-            "probs": to_object_da([det["probs"] for det in detections]),
+            "x": to_object_da([det["x"] for det in detection_arrays]),
+            "y": to_object_da([det["y"] for det in detection_arrays]),
+            "types": to_object_da([det["types"] for det in detection_arrays]),
+            "probs": to_object_da([det["probs"] for det in detection_arrays]),
         }
 
     def post_process_wsi(
@@ -212,10 +194,9 @@ def post_process_wsi(
                 - "probs": dask array of detection probabilities.
 
         """
+        _ = prediction_shape
+
         logger.info("Post processing WSI predictions in NucleusDetector")
-        logger.info("Raw probabilities shape: %s", prediction_shape)
-        logger.info("Raw probabilities dtype %s", prediction_dtype)
-        logger.info("Raw chunk size: %s", raw_predictions.chunks)
 
         # Add halo (overlap) around each block for post-processing
         depth_h = self.model.min_distance
@@ -350,36 +331,36 @@ def _save_predictions_zarr(
         patch_offsets = None
         if self.patch_mode and "x" in predictions:
             x_arr_list = predictions["x"].compute()
-            if x_arr_list is not None:
-                # lengths[i] = number of detections in patch i
-                lengths = np.array([len(a) for a in x_arr_list], dtype=np.int64)
-                patch_offsets = np.empty(len(lengths) + 1, dtype=np.int64)
-                patch_offsets[0] = 0
-                np.cumsum(lengths, out=patch_offsets[1:])
-
-                # Save patch_offsets as its own 1D dataset
-                offsets_da = da.from_array(patch_offsets, chunks="auto")
-                write_tasks.append(
-                    offsets_da.to_zarr(
-                        url=save_path,
-                        component="patch_offsets",
-                        compute=False,
-                    )
+
+            # lengths[i] = number of detections in patch i
+            lengths = np.array([len(a) for a in x_arr_list], dtype=np.int64)
+            patch_offsets = np.empty(len(lengths) + 1, dtype=np.int64)
+            patch_offsets[0] = 0
+            np.cumsum(lengths, out=patch_offsets[1:])
+
+            # Save patch_offsets as its own 1D dataset
+            offsets_da = da.from_array(patch_offsets, chunks="auto")
+            write_tasks.append(
+                offsets_da.to_zarr(
+                    url=save_path,
+                    component="patch_offsets",
+                    compute=False,
                 )
+            )
 
         # ---------------- save flattened predictions -----------------
         for key in keys_to_compute:
             raw = predictions[key]
 
-            # Normalise ragged per-patch predictions to a flat 1D Dask array
             dask_array = _flatten_predictions_to_dask(raw)
-
             # Type casting for storage
             if key != "probs":
                 dask_array = dask_array.astype(np.uint32)
             else:
                 dask_array = dask_array.astype(np.float32)
 
+            # Normalise ragged per-patch predictions to a flat 1D Dask array
+
             task = dask_array.to_zarr(
                 url=save_path,
                 component=key,