From efd164a3cd2598ee341b8b132e90a1f234e74480 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Fri, 2 May 2025 14:22:06 +0200
Subject: [PATCH 01/13] Add script for measuring the number of SGNs

---
 scripts/measurements/measure_sgns.py | 48 ++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 scripts/measurements/measure_sgns.py

diff --git a/scripts/measurements/measure_sgns.py b/scripts/measurements/measure_sgns.py
new file mode 100644
index 0000000..f94730a
--- /dev/null
+++ b/scripts/measurements/measure_sgns.py
@@ -0,0 +1,48 @@
+import json
+import os
+
+import numpy as np
+import pandas as pd
+from flamingo_tools.s3_utils import create_s3_target, BUCKET_NAME
+
+
+def open_json(fs, path):
+    s3_path = os.path.join(BUCKET_NAME, path)
+    with fs.open(s3_path, "r") as f:
+        content = json.load(f)
+    return content
+
+
+def open_tsv(fs, path):
+    s3_path = os.path.join(BUCKET_NAME, path)
+    with fs.open(s3_path, "r") as f:
+        table = pd.read_csv(f, sep="\t")
+    return table
+
+
+def main():
+    fs = create_s3_target()
+    project_info = open_json(fs, "project.json")
+    for dataset in project_info["datasets"]:
+        if dataset == "fens":
+            continue
+        print(dataset)
+        dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
+        sources = dataset_info["sources"]
+        for source, source_info in sources.items():
+            if not source.startswith("SGN"):
+                continue
+            assert "segmentation" in source_info
+            source_info = source_info["segmentation"]
+            table_path = source_info["tableData"]["tsv"]["relativePath"]
+            table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
+            component_labels = table.component_labels.values
+            remaining_sgns = component_labels[component_labels != 0]
+            print(source)
+            print("Number of SGNs (all components)   :", len(remaining_sgns))
+            _, n_per_component = np.unique(remaining_sgns, return_counts=True)
+            print("Number of SGNs (largest component):", max(n_per_component))
+
+
+if __name__ == "__main__":
+    main()

From 32cd5b64dc2e63de292fffb8f80c554d2d7cc883 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 8 May 2025 12:25:26 +0200
Subject: [PATCH 02/13] Implement intensity and morphology measurement function

---
 flamingo_tools/measurements.py                | 110 ++++++++++++++++++
 flamingo_tools/segmentation/postprocessing.py |   8 ++
 flamingo_tools/test_data.py                   |  28 +++++
 test/test_measurements.py                     |  42 +++++++
 4 files changed, 188 insertions(+)
 create mode 100644 flamingo_tools/measurements.py
 create mode 100644 test/test_measurements.py

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
new file mode 100644
index 0000000..be6bdaa
--- /dev/null
+++ b/flamingo_tools/measurements.py
@@ -0,0 +1,110 @@
+import multiprocessing as mp
+from concurrent import futures
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+import trimesh
+from skimage.measure import marching_cubes
+from tqdm import tqdm
+
+from .file_utils import read_image_data
+
+
+def _measure_volume_and_surface(mask, resolution):
+    # Use marching_cubes for 3D data
+    verts, faces, normals, _ = marching_cubes(mask, spacing=(resolution,) * 3)
+
+    mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_normals=normals)
+    surface = mesh.area
+    if mesh.is_watertight:
+        volume = np.abs(mesh.volume)
+    else:
+        volume = np.nan
+
+    return volume, surface
+
+
+# Could also support s3 directly?
+def compute_object_measures(
+    image_path: str,
+    segmentation_path: str,
+    segmentation_table_path: str,
+    output_table_path: str,
+    image_key: Optional[str] = None,
+    segmentation_key: Optional[str] = None,
+    n_threads: Optional[int] = None,
+    resolution: float = 0.38,
+):
+    """
+
+    Args:
+        image_path:
+        segmentation_path:
+        segmentation_table_path:
+        output_table_path:
+        image_key:
+        segmentation_key:
+        n_threads:
+        resolution:
+    """
+    # First, we load the pre-computed segmentation table from MoBIE.
+    table = pd.read_csv(segmentation_table_path, sep="\t")
+
+    # Then, open the volumes.
+    image = read_image_data(image_path, image_key)
+    segmentation = read_image_data(segmentation_path, segmentation_key)
+
+    def intensity_measures(seg_id):
+        # Get the bounding box.
+        row = table[table.label_id == seg_id]
+
+        bb_min = np.array([
+            row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item()
+        ]) / resolution
+        bb_min = np.round(bb_min, 0).astype("uint32")
+
+        bb_max = np.array([
+            row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item()
+        ]) / resolution
+        bb_max = np.round(bb_max, 0).astype("uint32")
+
+        bb = tuple(
+            slice(max(bmin - 1, 0), min(bmax + 1, sh))
+            for bmin, bmax, sh in zip(bb_min, bb_max, image.shape)
+        )
+
+        local_image = image[bb]
+        mask = segmentation[bb] == seg_id
+        masked_intensity = local_image[mask]
+
+        # Do the base intensity measurements.
+        measures = {
+            "label_id": seg_id,
+            "mean": np.mean(masked_intensity),
+            "stdev": np.std(masked_intensity),
+            "min": np.min(masked_intensity),
+            "max": np.max(masked_intensity),
+            "median": np.median(masked_intensity),
+        }
+        for percentile in (5, 10, 25, 75, 90, 95):
+            measures[f"percentile-{percentile}"] = np.percentile(masked_intensity, percentile)
+
+        # Do the volume and surface measurement.
+        volume, surface = _measure_volume_and_surface(mask, resolution)
+        measures["volume"] = volume
+        measures["surface"] = surface
+        return measures
+
+    seg_ids = table.label_id.values
+    n_threads = mp.cpu_count() if n_threads is None else n_threads
+    with futures.ThreadPoolExecutor(n_threads) as pool:
+        measures = list(tqdm(
+            pool.map(intensity_measures, seg_ids),
+            total=len(seg_ids), desc="Compute intensity measures"
+        ))
+
+    # Create the result table and save it.
+    keys = measures[0].keys()
+    measures = pd.DataFrame({k: [measure[k] for measure in measures] for k in keys})
+    measures.to_csv(output_table_path, sep="\t", index=False)
diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py
index 7ad987b..f2a956c 100644
--- a/flamingo_tools/segmentation/postprocessing.py
+++ b/flamingo_tools/segmentation/postprocessing.py
@@ -119,6 +119,8 @@ def _compute_table(segmentation, resolution):
     coordinates = np.array([prop.centroid for prop in props])
     # transform pixel distance to physical units
     coordinates = coordinates * resolution
+    bb_min = np.array([prop.bbox[:3] for prop in props]) * resolution
+    bb_max = np.array([prop.bbox[3:] for prop in props]) * resolution
     sizes = np.array([prop.area for prop in props])
     table = pd.DataFrame({
         "label_id": label_ids,
@@ -126,6 +128,12 @@ def _compute_table(segmentation, resolution):
         "anchor_x": coordinates[:, 2],
         "anchor_y": coordinates[:, 1],
         "anchor_z": coordinates[:, 0],
+        "bb_min_x": bb_min[:, 2],
+        "bb_min_y": bb_min[:, 1],
+        "bb_min_z": bb_min[:, 0],
+        "bb_max_x": bb_max[:, 2],
+        "bb_max_y": bb_max[:, 1],
+        "bb_max_z": bb_max[:, 0],
     })
     return table
 
diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py
index bca605d..2abcc41 100644
--- a/flamingo_tools/test_data.py
+++ b/flamingo_tools/test_data.py
@@ -1,7 +1,35 @@
 import os
+from typing import Tuple
 
 import imageio.v3 as imageio
 from skimage.data import binary_blobs
+from skimage.measure import label
+
+from .segmentation.postprocessing import _compute_table
+
+
+def create_image_data_and_segmentation(
+    folder: str, size: int = 256
+) -> Tuple[str, str, str]:
+    """Create test data containing an image, a corresponding segmentation and segmentation table.
+
+    Args:
+        folder: The test data folder.
+    """
+    os.makedirs(folder, exist_ok=True)
+    data = binary_blobs(size, n_dim=3).astype("uint8") * 255
+    seg = label(data)
+
+    image_path = os.path.join(folder, "image.tif")
+    segmentation_path = os.path.join(folder, "segmentation.tif")
+    imageio.imwrite(image_path, data)
+    imageio.imwrite(segmentation_path, seg)
+
+    table_path = os.path.join(folder, "default.tsv")
+    table = _compute_table(seg, resolution=0.38)
+    table.to_csv(table_path, sep="\t", index=False)
+
+    return image_path, segmentation_path, table_path
 
 
 # TODO add metadata
diff --git a/test/test_measurements.py b/test/test_measurements.py
new file mode 100644
index 0000000..cc4ee88
--- /dev/null
+++ b/test/test_measurements.py
@@ -0,0 +1,42 @@
+import os
+import unittest
+from shutil import rmtree
+
+import pandas as pd
+
+
+class TestDataConversion(unittest.TestCase):
+    folder = "./tmp"
+
+    def setUp(self):
+        from flamingo_tools.test_data import create_image_data_and_segmentation
+
+        self.image_path, self.seg_path, self.table_path =\
+            create_image_data_and_segmentation(self.folder)
+
+    def tearDown(self):
+        try:
+            rmtree(self.folder)
+        except Exception:
+            pass
+
+    def test_compute_object_measures(self):
+        from flamingo_tools.measurements import compute_object_measures
+
+        output_path = os.path.join(self.folder, "measurements.tsv")
+        compute_object_measures(
+            self.image_path, self.seg_path, self.table_path, output_path, n_threads=1
+        )
+        self.assertTrue(os.path.exists(output_path))
+
+        table = pd.read_csv(output_path, sep="\t")
+        self.assertTrue(len(table) >= 1)
+        expected_columns = ["label_id", "mean", "stdev", "min", "max", "median"]
+        expected_columns.extend([f"percentile-{p}" for p in (5, 10, 25, 75, 90, 95)])
+        expected_columns.extend(["volume", "surface"])
+        for col in expected_columns:
+            self.assertIn(col, table.columns)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 5b38552b73029ea37d1b33c1eb0ac343f93dbada Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 8 May 2025 13:32:51 +0200
Subject: [PATCH 03/13] Add trimesh dependency

---
 environment.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yaml b/environment.yaml
index 2aedfb7..4dca027 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -12,5 +12,6 @@ dependencies:
   - pytorch
   - s3fs
   - torch_em
+  - trimesh
   - z5py
   - zarr

From 9d998f549258cb1dc368faa44dd6b22228e884e6 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 8 May 2025 19:21:07 +0200
Subject: [PATCH 04/13] Update the measurement implementation

---
 flamingo_tools/measurements.py | 80 ++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 24 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index be6bdaa..5e314d1 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -9,6 +9,7 @@
 from tqdm import tqdm
 
 from .file_utils import read_image_data
+from .segmentation.postprocessing import _compute_table
 
 
 def _measure_volume_and_surface(mask, resolution):
@@ -25,35 +26,26 @@ def _measure_volume_and_surface(mask, resolution):
     return volume, surface
 
 
-# Could also support s3 directly?
-def compute_object_measures(
-    image_path: str,
-    segmentation_path: str,
-    segmentation_table_path: str,
-    output_table_path: str,
-    image_key: Optional[str] = None,
-    segmentation_key: Optional[str] = None,
+def compute_object_measures_impl(
+    image: np.typing.ArrayLike,
+    segmentation: np.typing.ArrayLike,
     n_threads: Optional[int] = None,
     resolution: float = 0.38,
-):
-    """
+    table: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
+    """Compute simple intensity and morphology measures for each segmented cell in a segmentation.
+
+    See `compute_object_measures` for details.
 
     Args:
-        image_path:
-        segmentation_path:
-        segmentation_table_path:
-        output_table_path:
-        image_key:
-        segmentation_key:
-        n_threads:
-        resolution:
+        image: The image data.
+        segmentation: The segmentation.
+        n_threads: The number of threads to use for computation.
+        resolution: The resolution / voxel size of the data.
+        table: The segmentation table. Will be computed on the fly if it is not given.
     """
-    # First, we load the pre-computed segmentation table from MoBIE.
-    table = pd.read_csv(segmentation_table_path, sep="\t")
-
-    # Then, open the volumes.
-    image = read_image_data(image_path, image_key)
-    segmentation = read_image_data(segmentation_path, segmentation_key)
+    if table is None:
+        table = _compute_table(segmentation, resolution)
 
     def intensity_measures(seg_id):
         # Get the bounding box.
@@ -107,4 +99,44 @@ def intensity_measures(seg_id):
     # Create the result table and save it.
     keys = measures[0].keys()
     measures = pd.DataFrame({k: [measure[k] for measure in measures] for k in keys})
+    return measures
+
+
+# Could also support s3 directly?
+def compute_object_measures(
+    image_path: str,
+    segmentation_path: str,
+    segmentation_table_path: str,
+    output_table_path: str,
+    image_key: Optional[str] = None,
+    segmentation_key: Optional[str] = None,
+    n_threads: Optional[int] = None,
+    resolution: float = 0.38,
+) -> None:
+    """Compute simple intensity and morphology measures for each segmented cell in a segmentation.
+
+    This computes the mean, standard deviation, minimum, maximum, median and
+    5th, 10th, 25th, 75th, 90th and 95th percentile of the intensity image
+    per cell, as well as the volume and surface.
+
+    Args:
+        image_path: The filepath to the image data. Either a tif or hdf5/zarr/n5 file.
+        segmentation_path: The filepath to the segmentation data. Either a tif or hdf5/zarr/n5 file.
+        segmentation_table_path: The path to the segmentation table in MoBIE format.
+        output_table_path: The path for saving the segmentation with intensity measures.
+        image_key: The key (= internal path) for the image data. Not needed fir tif.
+        segmentation_key: The key (= internal path) for the segmentation data. Not needed for tif.
+        n_threads: The number of threads to use for computation.
+        resolution: The resolution / voxel size of the data.
+    """
+    # First, we load the pre-computed segmentation table from MoBIE.
+    table = pd.read_csv(segmentation_table_path, sep="\t")
+
+    # Then, open the volumes.
+    image = read_image_data(image_path, image_key)
+    segmentation = read_image_data(segmentation_path, segmentation_key)
+
+    measures = compute_object_measures_impl(
+        image, segmentation, n_threads, resolution, table=table
+    )
     measures.to_csv(output_table_path, sep="\t", index=False)

From 33aae708c6ae25fcd0d4b126a025613a2feccb4a Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Fri, 9 May 2025 10:27:50 +0200
Subject: [PATCH 05/13] Update the measurement test

---
 test/test_measurements.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/test_measurements.py b/test/test_measurements.py
index cc4ee88..48f1667 100644
--- a/test/test_measurements.py
+++ b/test/test_measurements.py
@@ -2,6 +2,7 @@
 import unittest
 from shutil import rmtree
 
+import imageio.v3 as imageio
 import pandas as pd
 
 
@@ -37,6 +38,10 @@ def test_compute_object_measures(self):
         for col in expected_columns:
             self.assertIn(col, table.columns)
 
+        n_objects = int(imageio.imread(self.seg_path).max())
+        expected_shape = (n_objects, len(expected_columns))
+        self.assertEqual(table.shape, expected_shape)
+
 
 if __name__ == "__main__":
     unittest.main()

From 03ec9552166d98c25911441ee8a7f6e86442a99c Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 13 May 2025 21:22:22 +0200
Subject: [PATCH 06/13] Add segmentation and measurement scripts for SGN
 stainings

---
 .../segmentation/unet_prediction.py           |  5 +-
 .../check_segmentation.py                     | 35 +++++++++++
 .../measure_intensities.py                    | 39 +++++++++++++
 .../sgn_stain_predictions/run_prediction.py   | 58 +++++++++++++++++++
 4 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 scripts/sgn_stain_predictions/check_segmentation.py
 create mode 100644 scripts/sgn_stain_predictions/measure_intensities.py
 create mode 100644 scripts/sgn_stain_predictions/run_prediction.py

diff --git a/flamingo_tools/segmentation/unet_prediction.py b/flamingo_tools/segmentation/unet_prediction.py
index a1d1ba8..8b754d3 100644
--- a/flamingo_tools/segmentation/unet_prediction.py
+++ b/flamingo_tools/segmentation/unet_prediction.py
@@ -324,6 +324,7 @@ def run_unet_prediction(
     scale: Optional[float] = None,
     block_shape: Optional[Tuple[int, int, int]] = None,
     halo: Optional[Tuple[int, int, int]] = None,
+    use_mask: bool = True,
 ) -> None:
     """Run prediction and segmentation with a distance U-Net.
 
@@ -337,10 +338,12 @@ def run_unet_prediction(
             By default the data will not be rescaled.
         block_shape: The block-shape for running the prediction.
         halo: The halo (= block overlap) to use for prediction.
+        use_mask: Whether to use the masking heuristics to not run inference on empty blocks.
     """
     os.makedirs(output_folder, exist_ok=True)
 
-    find_mask(input_path, input_key, output_folder)
+    if use_mask:
+        find_mask(input_path, input_key, output_folder)
 
     original_shape = prediction_impl(
         input_path, input_key, output_folder, model_path, scale, block_shape, halo
diff --git a/scripts/sgn_stain_predictions/check_segmentation.py b/scripts/sgn_stain_predictions/check_segmentation.py
new file mode 100644
index 0000000..3a4799c
--- /dev/null
+++ b/scripts/sgn_stain_predictions/check_segmentation.py
@@ -0,0 +1,35 @@
+import os
+from glob import glob
+
+import imageio.v3 as imageio
+import napari
+
+
+ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops"
+SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations"
+
+
+def main():
+    files = sorted(glob(os.path.join(ROOT, "**/*.tif")))
+    for ff in files:
+        if "segmentations" in ff:
+            return
+        print("Visualizing", ff)
+        rel_path = os.path.relpath(ff, ROOT)
+        seg_path = os.path.join(SAVE_ROOT, rel_path)
+
+        image = imageio.imread(ff)
+        if os.path.exists(seg_path):
+            seg = imageio.imread(seg_path)
+        else:
+            seg = None
+
+        v = napari.Viewer()
+        v.add_image(image)
+        if seg is not None:
+            v.add_labels(seg)
+        napari.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sgn_stain_predictions/measure_intensities.py b/scripts/sgn_stain_predictions/measure_intensities.py
new file mode 100644
index 0000000..99f67ac
--- /dev/null
+++ b/scripts/sgn_stain_predictions/measure_intensities.py
@@ -0,0 +1,39 @@
+import os
+from glob import glob
+
+import tifffile
+from flamingo_tools.measurements import compute_object_measures_impl
+
+
+ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops"
+SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations"
+
+
+def measure_intensities(ff):
+    rel_path = os.path.relpath(ff, ROOT)
+    out_path = os.path.join("./measurements", rel_path.replace(".tif", ".xlsx"))
+    if os.path.exists(out_path):
+        return
+
+    print("Computing measurements for", rel_path)
+    seg_path = os.path.join(SAVE_ROOT, rel_path)
+
+    image_data = tifffile.memmap(ff)
+    seg_data = tifffile.memmap(seg_path)
+
+    table = compute_object_measures_impl(image_data, seg_data, n_threads=8)
+
+    os.makedirs(os.path.split(out_path)[0], exist_ok=True)
+    table.to_excel(out_path, index=False)
+
+
+def main():
+    files = sorted(glob(os.path.join(ROOT, "**/*.tif")))
+    for ff in files:
+        if "segmentations" in ff:
+            return
+        measure_intensities(ff)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sgn_stain_predictions/run_prediction.py b/scripts/sgn_stain_predictions/run_prediction.py
new file mode 100644
index 0000000..1badae3
--- /dev/null
+++ b/scripts/sgn_stain_predictions/run_prediction.py
@@ -0,0 +1,58 @@
+import os
+import tempfile
+from glob import glob
+
+import tifffile
+from elf.io import open_file
+from flamingo_tools.segmentation import run_unet_prediction
+
+ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops"
+MODEL_PATH = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/cochlea_distance_unet_SGN_March2025Model"  # noqa
+
+SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations"
+
+
+def check_data():
+    files = glob(os.path.join(ROOT, "**/*.tif"), recursive=True)
+    for ff in files:
+        rel_path = sorted(os.path.relpath(ff, ROOT))
+        shape = tifffile.memmap(ff).shape
+        print(rel_path, shape)
+
+
+def segment_crop(input_file):
+    fname = os.path.relpath(input_file, ROOT)
+    out_file = os.path.join(SAVE_ROOT, fname)
+    if "segmentations" in input_file:
+        return
+    if os.path.exists(out_file):
+        return
+
+    print("Run prediction for", input_file)
+    os.makedirs(os.path.split(out_file)[0], exist_ok=True)
+    with tempfile.TemporaryDirectory() as tmp_folder:
+        run_unet_prediction(
+            input_file, input_key=None, output_folder=tmp_folder,
+            model_path=MODEL_PATH, min_size=1000, use_mask=False,
+        )
+        seg_path = os.path.join(tmp_folder, "segmentation.zarr")
+        with open_file(seg_path, mode="r") as f:
+            seg = f["segmentation"][:]
+
+    print("Writing output to", out_file)
+    tifffile.imwrite(out_file, seg, bigtiff=True)
+
+
+def segment_all():
+    files = sorted(glob(os.path.join(ROOT, "**/*.tif"), recursive=True))
+    for ff in files:
+        segment_crop(ff)
+
+
+def main():
+    # check_data()
+    segment_all()
+
+
+if __name__ == "__main__":
+    main()

From 359b3e3e408a904779cd9ab7734286a46ce3bb32 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 13 May 2025 22:02:13 +0200
Subject: [PATCH 07/13] Update measurement test

---
 flamingo_tools/test_data.py | 33 +++++++++++++++++++++++++++++----
 test/test_measurements.py   |  7 +++----
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py
index 2abcc41..4f9f5ef 100644
--- a/flamingo_tools/test_data.py
+++ b/flamingo_tools/test_data.py
@@ -2,15 +2,40 @@
 from typing import Tuple
 
 import imageio.v3 as imageio
-from skimage.data import binary_blobs
+import requests
+from skimage.data import binary_blobs, cells3d
 from skimage.measure import label
 
 from .segmentation.postprocessing import _compute_table
 
+SEGMENTATION_URL = "https://owncloud.gwdg.de/index.php/s/kwoGRYiJRRrswgw/download"
 
-def create_image_data_and_segmentation(
-    folder: str, size: int = 256
-) -> Tuple[str, str, str]:
+
+def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]:
+    os.makedirs(folder, exist_ok=True)
+
+    segmentation_path = os.path.join(folder, "segmentation.tif")
+    resp = requests.get(SEGMENTATION_URL)
+    resp.raise_for_status()
+
+    with open(segmentation_path, "wb") as f:
+        f.write(resp.content)
+
+    nuclei = cells3d()[20:40, 1]
+    segmentation = imageio.imread(segmentation_path)
+    assert nuclei.shape == segmentation.shape
+
+    image_path = os.path.join(folder, "image.tif")
+    imageio.imwrite(image_path, nuclei)
+
+    table_path = os.path.join(folder, "default.tsv")
+    table = _compute_table(segmentation, resolution=0.38)
+    table.to_csv(table_path, sep="\t", index=False)
+
+    return image_path, segmentation_path, table_path
+
+
+def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[str, str, str]:
     """Create test data containing an image, a corresponding segmentation and segmentation table.
 
     Args:
diff --git a/test/test_measurements.py b/test/test_measurements.py
index 48f1667..cecb305 100644
--- a/test/test_measurements.py
+++ b/test/test_measurements.py
@@ -6,14 +6,13 @@
 import pandas as pd
 
 
-class TestDataConversion(unittest.TestCase):
+class TestMeasurements(unittest.TestCase):
     folder = "./tmp"
 
     def setUp(self):
-        from flamingo_tools.test_data import create_image_data_and_segmentation
+        from flamingo_tools.test_data import get_test_volume_and_segmentation
 
-        self.image_path, self.seg_path, self.table_path =\
-            create_image_data_and_segmentation(self.folder)
+        self.image_path, self.seg_path, self.table_path = get_test_volume_and_segmentation(self.folder)
 
     def tearDown(self):
         try:

From 158afcb9588ddf6cf36074773c40ebb32a233c30 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 13 May 2025 22:24:03 +0200
Subject: [PATCH 08/13] Update measurement tests

---
 flamingo_tools/measurements.py |  4 ++--
 test/test_measurements.py      | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index 5e314d1..8114c7b 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -89,11 +89,11 @@ def intensity_measures(seg_id):
         return measures
 
     seg_ids = table.label_id.values
+    assert len(seg_ids) > 0, "The segmentation table is empty."
     n_threads = mp.cpu_count() if n_threads is None else n_threads
     with futures.ThreadPoolExecutor(n_threads) as pool:
         measures = list(tqdm(
-            pool.map(intensity_measures, seg_ids),
-            total=len(seg_ids), desc="Compute intensity measures"
+            pool.map(intensity_measures, seg_ids), total=len(seg_ids), desc="Compute intensity measures"
         ))
 
     # Create the result table and save it.
diff --git a/test/test_measurements.py b/test/test_measurements.py
index cecb305..7ab67e3 100644
--- a/test/test_measurements.py
+++ b/test/test_measurements.py
@@ -4,6 +4,8 @@
 
 import imageio.v3 as imageio
 import pandas as pd
+import numpy as np
+from skimage.measure import regionprops_table
 
 
 class TestMeasurements(unittest.TestCase):
@@ -41,6 +43,18 @@ def test_compute_object_measures(self):
         expected_shape = (n_objects, len(expected_columns))
         self.assertEqual(table.shape, expected_shape)
 
+        image = imageio.imread(self.image_path)
+        segmentation = imageio.imread(self.seg_path)
+        properties = ("label", "intensity_mean", "intensity_std", "intensity_min", "intensity_max")
+        expected_measures = regionprops_table(segmentation, intensity_image=image, properties=properties)
+        expected_measures = pd.DataFrame(expected_measures)
+
+        for (col, col_exp) in [
+            ("label_id", "label"), ("mean", "intensity_mean"), ("stdev", "intensity_std"),
+            ("min", "intensity_min"), ("max", "intensity_max"),
+        ]:
+            self.assertTrue(np.allclose(table[col].values, expected_measures[col_exp].values))
+
 
 if __name__ == "__main__":
     unittest.main()

From 632834bd54b71826f9a641c22ba42ff006642d8e Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 13 May 2025 22:30:57 +0200
Subject: [PATCH 09/13] Try to fix CI test

---
 flamingo_tools/measurements.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index 8114c7b..ac126a6 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -75,8 +75,8 @@ def intensity_measures(seg_id):
             "label_id": seg_id,
             "mean": np.mean(masked_intensity),
             "stdev": np.std(masked_intensity),
-            "min": np.min(masked_intensity),
-            "max": np.max(masked_intensity),
+            "min": np.nanmin(masked_intensity),
+            "max": np.nanmax(masked_intensity),
             "median": np.median(masked_intensity),
         }
         for percentile in (5, 10, 25, 75, 90, 95):

From a4d601ca6b20b9772180c4943f04bf190ef8a727 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 13 May 2025 22:36:42 +0200
Subject: [PATCH 10/13] Try to debug CLI tests

---
 flamingo_tools/measurements.py                | 9 +++++----
 flamingo_tools/segmentation/postprocessing.py | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index ac126a6..505a814 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -53,12 +53,12 @@ def intensity_measures(seg_id):
 
         bb_min = np.array([
             row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item()
-        ]) / resolution
+        ]).astype("float32") / resolution
         bb_min = np.round(bb_min, 0).astype("uint32")
 
         bb_max = np.array([
             row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item()
-        ]) / resolution
+        ]).astype("float32") / resolution
         bb_max = np.round(bb_max, 0).astype("uint32")
 
         bb = tuple(
@@ -68,6 +68,7 @@ def intensity_measures(seg_id):
 
         local_image = image[bb]
         mask = segmentation[bb] == seg_id
+        assert mask.sum() > 0, f"Segmentation ID {seg_id} is empty."
         masked_intensity = local_image[mask]
 
         # Do the base intensity measurements.
@@ -75,8 +76,8 @@ def intensity_measures(seg_id):
             "label_id": seg_id,
             "mean": np.mean(masked_intensity),
             "stdev": np.std(masked_intensity),
-            "min": np.nanmin(masked_intensity),
-            "max": np.nanmax(masked_intensity),
+            "min": np.min(masked_intensity),
+            "max": np.max(masked_intensity),
             "median": np.median(masked_intensity),
         }
         for percentile in (5, 10, 25, 75, 90, 95):
diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py
index a7749a8..7716662 100644
--- a/flamingo_tools/segmentation/postprocessing.py
+++ b/flamingo_tools/segmentation/postprocessing.py
@@ -118,11 +118,11 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray:
 def _compute_table(segmentation, resolution):
     props = measure.regionprops(segmentation)
     label_ids = np.array([prop.label for prop in props])
-    coordinates = np.array([prop.centroid for prop in props])
+    coordinates = np.array([prop.centroid for prop in props]).astype("float32")
     # transform pixel distance to physical units
     coordinates = coordinates * resolution
-    bb_min = np.array([prop.bbox[:3] for prop in props]) * resolution
-    bb_max = np.array([prop.bbox[3:] for prop in props]) * resolution
+    bb_min = np.array([prop.bbox[:3] for prop in props]).astype("float32") * resolution
+    bb_max = np.array([prop.bbox[3:] for prop in props]).astype("float32") * resolution
     sizes = np.array([prop.area for prop in props])
     table = pd.DataFrame({
         "label_id": label_ids,

From b70c121bef56134bb43334fc29ac019d99f79e07 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Wed, 14 May 2025 08:30:00 +0200
Subject: [PATCH 11/13] Add test for segmentation table

---
 flamingo_tools/measurements.py                |  4 +--
 flamingo_tools/segmentation/postprocessing.py |  9 +++---
 flamingo_tools/test_data.py                   | 13 ++++++--
 test/test_segmentation/test_postprocessing.py | 31 ++++++++++++++++++-
 4 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index 505a814..755c883 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -9,7 +9,7 @@
 from tqdm import tqdm
 
 from .file_utils import read_image_data
-from .segmentation.postprocessing import _compute_table
+from .segmentation.postprocessing import compute_table_on_the_fly
 
 
 def _measure_volume_and_surface(mask, resolution):
@@ -45,7 +45,7 @@ def compute_object_measures_impl(
         table: The segmentation table. Will be computed on the fly if it is not given.
     """
     if table is None:
-        table = _compute_table(segmentation, resolution)
+        table = compute_table_on_the_fly(segmentation, resolution=resolution)
 
     def intensity_measures(seg_id):
         # Get the bounding box.
diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py
index 7716662..9b0f6ea 100644
--- a/flamingo_tools/segmentation/postprocessing.py
+++ b/flamingo_tools/segmentation/postprocessing.py
@@ -115,7 +115,9 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray:
 #
 
 
-def _compute_table(segmentation, resolution):
+def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: float) -> pd.DataFrame:
+    """
+    """
     props = measure.regionprops(segmentation)
     label_ids = np.array([prop.label for prop in props])
     coordinates = np.array([prop.centroid for prop in props]).astype("float32")
@@ -171,10 +173,9 @@ def filter_segmentation(
         n_ids
         n_ids_filtered
     """
-    # Compute the table on the fly.
-    # NOTE: this currently doesn't work for large segmentations.
+    # Compute the table on the fly. This doesn't work for large segmentations.
     if table is None:
-        table = _compute_table(segmentation, resolution=resolution)
+        table = compute_table_on_the_fly(segmentation, resolution=resolution)
     n_ids = len(table)
 
     # First apply the size filter.
diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py
index 4f9f5ef..fec082d 100644
--- a/flamingo_tools/test_data.py
+++ b/flamingo_tools/test_data.py
@@ -6,12 +6,19 @@
 from skimage.data import binary_blobs, cells3d
 from skimage.measure import label
 
-from .segmentation.postprocessing import _compute_table
+from .segmentation.postprocessing import compute_table_on_the_fly
 
 SEGMENTATION_URL = "https://owncloud.gwdg.de/index.php/s/kwoGRYiJRRrswgw/download"
 
 
 def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]:
+    """Download a small volume with nuclei and corresponding segmentation.
+
+    Args:
+        folder:
+
+    Returns:
+    """
     os.makedirs(folder, exist_ok=True)
 
     segmentation_path = os.path.join(folder, "segmentation.tif")
@@ -29,7 +36,7 @@ def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]:
     imageio.imwrite(image_path, nuclei)
 
     table_path = os.path.join(folder, "default.tsv")
-    table = _compute_table(segmentation, resolution=0.38)
+    table = compute_table_on_the_fly(segmentation, resolution=0.38)
     table.to_csv(table_path, sep="\t", index=False)
 
     return image_path, segmentation_path, table_path
@@ -51,7 +58,7 @@ def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[st
     imageio.imwrite(segmentation_path, seg)
 
     table_path = os.path.join(folder, "default.tsv")
-    table = _compute_table(seg, resolution=0.38)
+    table = compute_table_on_the_fly(seg, resolution=0.38)
     table.to_csv(table_path, sep="\t", index=False)
 
     return image_path, segmentation_path, table_path
diff --git a/test/test_segmentation/test_postprocessing.py b/test/test_segmentation/test_postprocessing.py
index 531e2d2..c8be572 100644
--- a/test/test_segmentation/test_postprocessing.py
+++ b/test/test_segmentation/test_postprocessing.py
@@ -2,9 +2,12 @@
 import tempfile
 import unittest
 
+import imageio.v3 as imageio
+import numpy as np
+import pandas as pd
 from elf.io import open_file
 from skimage.data import binary_blobs
-from skimage.measure import label
+from skimage.measure import label, regionprops_table
 
 
 class TestPostprocessing(unittest.TestCase):
@@ -44,6 +47,32 @@ def test_neighbors_in_radius(self):
 
         self._test_postprocessing(neighbors_in_radius, threshold=5)
 
+    def test_compute_table_on_the_fly(self):
+        from flamingo_tools.segmentation.postprocessing import compute_table_on_the_fly
+        from flamingo_tools.test_data import get_test_volume_and_segmentation
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            _, seg_path, _ = get_test_volume_and_segmentation(tmp_dir)
+            segmentation = imageio.imread(seg_path)
+
+        resolution = 0.38
+        table = compute_table_on_the_fly(segmentation, resolution=resolution)
+
+        properties = ("label", "bbox", "centroid")
+        expected_table = regionprops_table(segmentation, properties=properties)
+        expected_table = pd.DataFrame(expected_table)
+
+        for (col, col_exp) in [
+            ("label_id", "label"),
+            ("anchor_x", "centroid-2"), ("anchor_y", "centroid-1"), ("anchor_z", "centroid-0"),
+            ("bb_min_x", "bbox-2"), ("bb_min_y", "bbox-1"), ("bb_min_z", "bbox-0"),
+            ("bb_max_x", "bbox-5"), ("bb_max_y", "bbox-4"), ("bb_max_z", "bbox-3"),
+        ]:
+            values = table[col].values
+            if col != "label_id":
+                values /= resolution
+            self.assertTrue(np.allclose(values, expected_table[col_exp].values))
+
 
 if __name__ == "__main__":
     unittest.main()

From 944f6eebd83d172ff00f37f3899863dbcddf3630 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Wed, 14 May 2025 08:34:32 +0200
Subject: [PATCH 12/13] Try to fix CI

---
 flamingo_tools/measurements.py |  4 ++--
 flamingo_tools/test_data.py    | 12 ++++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index 755c883..99b6f95 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -54,12 +54,12 @@ def intensity_measures(seg_id):
         bb_min = np.array([
             row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item()
         ]).astype("float32") / resolution
-        bb_min = np.round(bb_min, 0).astype("uint32")
+        bb_min = np.round(bb_min, 0).astype("int32")
 
         bb_max = np.array([
             row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item()
         ]).astype("float32") / resolution
-        bb_max = np.round(bb_max, 0).astype("uint32")
+        bb_max = np.round(bb_max, 0).astype("int32")
 
         bb = tuple(
             slice(max(bmin - 1, 0), min(bmax + 1, sh))
diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py
index fec082d..7450a3b 100644
--- a/flamingo_tools/test_data.py
+++ b/flamingo_tools/test_data.py
@@ -15,9 +15,12 @@ def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]:
     """Download a small volume with nuclei and corresponding segmentation.
 
     Args:
-        folder:
+        folder: The test data folder. The data will be downloaded to this folder.
 
     Returns:
+        The path to the image, stored as tif.
+        The path to the segmentation, stored as tif.
+        The path to the segmentation table, stored as tsv.
     """
     os.makedirs(folder, exist_ok=True)
 
@@ -46,7 +49,12 @@ def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[st
     """Create test data containing an image, a corresponding segmentation and segmentation table.
 
     Args:
-        folder: The test data folder.
+        folder: The test data folder. The data will be written to this folder.
+
+    Returns:
+        The path to the image, stored as tif.
+        The path to the segmentation, stored as tif.
+        The path to the segmentation table, stored as tsv.
     """
     os.makedirs(folder, exist_ok=True)
     data = binary_blobs(size, n_dim=3).astype("uint8") * 255

From f2ecef02de0c6a4395d22cd705cea8d7928af4b5 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Wed, 14 May 2025 08:39:15 +0200
Subject: [PATCH 13/13] Update doc strings

---
 flamingo_tools/measurements.py                |  3 +++
 flamingo_tools/segmentation/postprocessing.py | 18 ++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
index 99b6f95..442b4fe 100644
--- a/flamingo_tools/measurements.py
+++ b/flamingo_tools/measurements.py
@@ -43,6 +43,9 @@ def compute_object_measures_impl(
         n_threads: The number of threads to use for computation.
         resolution: The resolution / voxel size of the data.
         table: The segmentation table. Will be computed on the fly if it is not given.
+
+    Returns:
+        The table with per object measurements.
     """
     if table is None:
         table = compute_table_on_the_fly(segmentation, resolution=resolution)
diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py
index 9b0f6ea..3b24ce9 100644
--- a/flamingo_tools/segmentation/postprocessing.py
+++ b/flamingo_tools/segmentation/postprocessing.py
@@ -116,7 +116,17 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray:
 
 
 def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: float) -> pd.DataFrame:
-    """
+    """Compute a segmentation table compatible with MoBIE.
+
+    The table contains information about the number of pixels per object,
+    the anchor (= centroid) and the bounding box. Anchor and bounding box are given in physical coordinates.
+
+    Args:
+        segmentation: The segmentation for which to compute the table.
+        resolution: The physical voxel spacing of the data.
+
+    Returns:
+        The segmentation table.
     """
     props = measure.regionprops(segmentation)
     label_ids = np.array([prop.label for prop in props])
@@ -128,7 +138,6 @@ def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: floa
     sizes = np.array([prop.area for prop in props])
     table = pd.DataFrame({
         "label_id": label_ids,
-        "n_pixels": sizes,
         "anchor_x": coordinates[:, 2],
         "anchor_y": coordinates[:, 1],
         "anchor_z": coordinates[:, 0],
@@ -138,6 +147,7 @@ def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: floa
         "bb_max_x": bb_max[:, 2],
         "bb_max_y": bb_max[:, 1],
         "bb_max_z": bb_max[:, 0],
+        "n_pixels": sizes,
     })
     return table
 
@@ -170,8 +180,8 @@ def filter_segmentation(
         spatial_statistics_kwargs: Arguments for spatial statistics function
 
     Returns:
-        n_ids
-        n_ids_filtered
+        The number of objects before filtering.
+        The number of objects after filtering.
     """
     # Compute the table on the fly. This doesn't work for large segmentations.
     if table is None: