Implement intensity and morphology measurement function

constantinpape · constantinpape · commit 32cd5b64dc2e · 2025-05-08T12:25:31.000+02:00
diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
@@ -0,0 +1,110 @@
+import multiprocessing as mp
+from concurrent import futures
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+import trimesh
+from skimage.measure import marching_cubes
+from tqdm import tqdm
+
+from .file_utils import read_image_data
+
+
+def _measure_volume_and_surface(mask, resolution):
+    # Use marching_cubes for 3D data
+    verts, faces, normals, _ = marching_cubes(mask, spacing=(resolution,) * 3)
+
+    mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_normals=normals)
+    surface = mesh.area
+    if mesh.is_watertight:
+        volume = np.abs(mesh.volume)
+    else:
+        volume = np.nan
+
+    return volume, surface
+
+
+# Could also support s3 directly?
+def compute_object_measures(
+    image_path: str,
+    segmentation_path: str,
+    segmentation_table_path: str,
+    output_table_path: str,
+    image_key: Optional[str] = None,
+    segmentation_key: Optional[str] = None,
+    n_threads: Optional[int] = None,
+    resolution: float = 0.38,
+):
+    """
+
+    Args:
+        image_path:
+        segmentation_path:
+        segmentation_table_path:
+        output_table_path:
+        image_key:
+        segmentation_key:
+        n_threads:
+        resolution:
+    """
+    # First, we load the pre-computed segmentation table from MoBIE.
+    table = pd.read_csv(segmentation_table_path, sep="\t")
+
+    # Then, open the volumes.
+    image = read_image_data(image_path, image_key)
+    segmentation = read_image_data(segmentation_path, segmentation_key)
+
+    def intensity_measures(seg_id):
+        # Get the bounding box.
+        row = table[table.label_id == seg_id]
+
+        bb_min = np.array([
+            row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item()
+        ]) / resolution
+        bb_min = np.round(bb_min, 0).astype("uint32")
+
+        bb_max = np.array([
+            row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item()
+        ]) / resolution
+        bb_max = np.round(bb_max, 0).astype("uint32")
+
+        bb = tuple(
+            slice(max(bmin - 1, 0), min(bmax + 1, sh))
+            for bmin, bmax, sh in zip(bb_min, bb_max, image.shape)
+        )
+
+        local_image = image[bb]
+        mask = segmentation[bb] == seg_id
+        masked_intensity = local_image[mask]
+
+        # Do the base intensity measurements.
+        measures = {
+            "label_id": seg_id,
+            "mean": np.mean(masked_intensity),
+            "stdev": np.std(masked_intensity),
+            "min": np.min(masked_intensity),
+            "max": np.max(masked_intensity),
+            "median": np.median(masked_intensity),
+        }
+        for percentile in (5, 10, 25, 75, 90, 95):
+            measures[f"percentile-{percentile}"] = np.percentile(masked_intensity, percentile)
+
+        # Do the volume and surface measurement.
+        volume, surface = _measure_volume_and_surface(mask, resolution)
+        measures["volume"] = volume
+        measures["surface"] = surface
+        return measures
+
+    seg_ids = table.label_id.values
+    n_threads = mp.cpu_count() if n_threads is None else n_threads
+    with futures.ThreadPoolExecutor(n_threads) as pool:
+        measures = list(tqdm(
+            pool.map(intensity_measures, seg_ids),
+            total=len(seg_ids), desc="Compute intensity measures"
+        ))
+
+    # Create the result table and save it.
+    keys = measures[0].keys()
+    measures = pd.DataFrame({k: [measure[k] for measure in measures] for k in keys})
+    measures.to_csv(output_table_path, sep="\t", index=False)
diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py
@@ -119,13 +119,21 @@ def _compute_table(segmentation, resolution):
     coordinates = np.array([prop.centroid for prop in props])
     # transform pixel distance to physical units
     coordinates = coordinates * resolution
+    bb_min = np.array([prop.bbox[:3] for prop in props]) * resolution
+    bb_max = np.array([prop.bbox[3:] for prop in props]) * resolution
     sizes = np.array([prop.area for prop in props])
     table = pd.DataFrame({
         "label_id": label_ids,
         "n_pixels": sizes,
         "anchor_x": coordinates[:, 2],
         "anchor_y": coordinates[:, 1],
         "anchor_z": coordinates[:, 0],
+        "bb_min_x": bb_min[:, 2],
+        "bb_min_y": bb_min[:, 1],
+        "bb_min_z": bb_min[:, 0],
+        "bb_max_x": bb_max[:, 2],
+        "bb_max_y": bb_max[:, 1],
+        "bb_max_z": bb_max[:, 0],
     })
     return table
 
diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py
@@ -1,7 +1,35 @@
 import os
+from typing import Tuple
 
 import imageio.v3 as imageio
 from skimage.data import binary_blobs
+from skimage.measure import label
+
+from .segmentation.postprocessing import _compute_table
+
+
+def create_image_data_and_segmentation(
+    folder: str, size: int = 256
+) -> Tuple[str, str, str]:
+    """Create test data containing an image, a corresponding segmentation and segmentation table.
+
+    Args:
+        folder: The test data folder.
+    """
+    os.makedirs(folder, exist_ok=True)
+    data = binary_blobs(size, n_dim=3).astype("uint8") * 255
+    seg = label(data)
+
+    image_path = os.path.join(folder, "image.tif")
+    segmentation_path = os.path.join(folder, "segmentation.tif")
+    imageio.imwrite(image_path, data)
+    imageio.imwrite(segmentation_path, seg)
+
+    table_path = os.path.join(folder, "default.tsv")
+    table = _compute_table(seg, resolution=0.38)
+    table.to_csv(table_path, sep="\t", index=False)
+
+    return image_path, segmentation_path, table_path
 
 
 # TODO add metadata
diff --git a/test/test_measurements.py b/test/test_measurements.py
@@ -0,0 +1,42 @@
+import os
+import unittest
+from shutil import rmtree
+
+import pandas as pd
+
+
+class TestDataConversion(unittest.TestCase):
+    folder = "./tmp"
+
+    def setUp(self):
+        from flamingo_tools.test_data import create_image_data_and_segmentation
+
+        self.image_path, self.seg_path, self.table_path =\
+            create_image_data_and_segmentation(self.folder)
+
+    def tearDown(self):
+        try:
+            rmtree(self.folder)
+        except Exception:
+            pass
+
+    def test_compute_object_measures(self):
+        from flamingo_tools.measurements import compute_object_measures
+
+        output_path = os.path.join(self.folder, "measurements.tsv")
+        compute_object_measures(
+            self.image_path, self.seg_path, self.table_path, output_path, n_threads=1
+        )
+        self.assertTrue(os.path.exists(output_path))
+
+        table = pd.read_csv(output_path, sep="\t")
+        self.assertTrue(len(table) >= 1)
+        expected_columns = ["label_id", "mean", "stdev", "min", "max", "median"]
+        expected_columns.extend([f"percentile-{p}" for p in (5, 10, 25, 75, 90, 95)])
+        expected_columns.extend(["volume", "surface"])
+        for col in expected_columns:
+            self.assertIn(col, table.columns)
+
+
+if __name__ == "__main__":
+    unittest.main()