From efd164a3cd2598ee341b8b132e90a1f234e74480 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 2 May 2025 14:22:06 +0200 Subject: [PATCH 01/13] Add script for measuring the number of SGNs --- scripts/measurements/measure_sgns.py | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 scripts/measurements/measure_sgns.py diff --git a/scripts/measurements/measure_sgns.py b/scripts/measurements/measure_sgns.py new file mode 100644 index 0000000..f94730a --- /dev/null +++ b/scripts/measurements/measure_sgns.py @@ -0,0 +1,48 @@ +import json +import os + +import numpy as np +import pandas as pd +from flamingo_tools.s3_utils import create_s3_target, BUCKET_NAME + + +def open_json(fs, path): + s3_path = os.path.join(BUCKET_NAME, path) + with fs.open(s3_path, "r") as f: + content = json.load(f) + return content + + +def open_tsv(fs, path): + s3_path = os.path.join(BUCKET_NAME, path) + with fs.open(s3_path, "r") as f: + table = pd.read_csv(f, sep="\t") + return table + + +def main(): + fs = create_s3_target() + project_info = open_json(fs, "project.json") + for dataset in project_info["datasets"]: + if dataset == "fens": + continue + print(dataset) + dataset_info = open_json(fs, os.path.join(dataset, "dataset.json")) + sources = dataset_info["sources"] + for source, source_info in sources.items(): + if not source.startswith("SGN"): + continue + assert "segmentation" in source_info + source_info = source_info["segmentation"] + table_path = source_info["tableData"]["tsv"]["relativePath"] + table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv")) + component_labels = table.component_labels.values + remaining_sgns = component_labels[component_labels != 0] + print(source) + print("Number of SGNs (all components) :", len(remaining_sgns)) + _, n_per_component = np.unique(remaining_sgns, return_counts=True) + print("Number of SGNs (largest component):", max(n_per_component)) + + +if __name__ == "__main__": + main() From 32cd5b64dc2e63de292fffb8f80c554d2d7cc883 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 8 May 2025 12:25:26 +0200 Subject: [PATCH 02/13] Implement intensity and morphology measurement function --- flamingo_tools/measurements.py | 110 ++++++++++++++++++ flamingo_tools/segmentation/postprocessing.py | 8 ++ flamingo_tools/test_data.py | 28 +++++ test/test_measurements.py | 42 +++++++ 4 files changed, 188 insertions(+) create mode 100644 flamingo_tools/measurements.py create mode 100644 test/test_measurements.py diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py new file mode 100644 index 0000000..be6bdaa --- /dev/null +++ b/flamingo_tools/measurements.py @@ -0,0 +1,110 @@ +import multiprocessing as mp +from concurrent import futures +from typing import Optional + +import numpy as np +import pandas as pd +import trimesh +from skimage.measure import marching_cubes +from tqdm import tqdm + +from .file_utils import read_image_data + + +def _measure_volume_and_surface(mask, resolution): + # Use marching_cubes for 3D data + verts, faces, normals, _ = marching_cubes(mask, spacing=(resolution,) * 3) + + mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_normals=normals) + surface = mesh.area + if mesh.is_watertight: + volume = np.abs(mesh.volume) + else: + volume = np.nan + + return volume, surface + + +# Could also support s3 directly? +def compute_object_measures( + image_path: str, + segmentation_path: str, + segmentation_table_path: str, + output_table_path: str, + image_key: Optional[str] = None, + segmentation_key: Optional[str] = None, + n_threads: Optional[int] = None, + resolution: float = 0.38, +): + """ + + Args: + image_path: + segmentation_path: + segmentation_table_path: + output_table_path: + image_key: + segmentation_key: + n_threads: + resolution: + """ + # First, we load the pre-computed segmentation table from MoBIE. + table = pd.read_csv(segmentation_table_path, sep="\t") + + # Then, open the volumes. + image = read_image_data(image_path, image_key) + segmentation = read_image_data(segmentation_path, segmentation_key) + + def intensity_measures(seg_id): + # Get the bounding box. + row = table[table.label_id == seg_id] + + bb_min = np.array([ + row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item() + ]) / resolution + bb_min = np.round(bb_min, 0).astype("uint32") + + bb_max = np.array([ + row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item() + ]) / resolution + bb_max = np.round(bb_max, 0).astype("uint32") + + bb = tuple( + slice(max(bmin - 1, 0), min(bmax + 1, sh)) + for bmin, bmax, sh in zip(bb_min, bb_max, image.shape) + ) + + local_image = image[bb] + mask = segmentation[bb] == seg_id + masked_intensity = local_image[mask] + + # Do the base intensity measurements. + measures = { + "label_id": seg_id, + "mean": np.mean(masked_intensity), + "stdev": np.std(masked_intensity), + "min": np.min(masked_intensity), + "max": np.max(masked_intensity), + "median": np.median(masked_intensity), + } + for percentile in (5, 10, 25, 75, 90, 95): + measures[f"percentile-{percentile}"] = np.percentile(masked_intensity, percentile) + + # Do the volume and surface measurement. + volume, surface = _measure_volume_and_surface(mask, resolution) + measures["volume"] = volume + measures["surface"] = surface + return measures + + seg_ids = table.label_id.values + n_threads = mp.cpu_count() if n_threads is None else n_threads + with futures.ThreadPoolExecutor(n_threads) as pool: + measures = list(tqdm( + pool.map(intensity_measures, seg_ids), + total=len(seg_ids), desc="Compute intensity measures" + )) + + # Create the result table and save it. + keys = measures[0].keys() + measures = pd.DataFrame({k: [measure[k] for measure in measures] for k in keys}) + measures.to_csv(output_table_path, sep="\t", index=False) diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py index 7ad987b..f2a956c 100644 --- a/flamingo_tools/segmentation/postprocessing.py +++ b/flamingo_tools/segmentation/postprocessing.py @@ -119,6 +119,8 @@ def _compute_table(segmentation, resolution): coordinates = np.array([prop.centroid for prop in props]) # transform pixel distance to physical units coordinates = coordinates * resolution + bb_min = np.array([prop.bbox[:3] for prop in props]) * resolution + bb_max = np.array([prop.bbox[3:] for prop in props]) * resolution sizes = np.array([prop.area for prop in props]) table = pd.DataFrame({ "label_id": label_ids, @@ -126,6 +128,12 @@ def _compute_table(segmentation, resolution): "anchor_x": coordinates[:, 2], "anchor_y": coordinates[:, 1], "anchor_z": coordinates[:, 0], + "bb_min_x": bb_min[:, 2], + "bb_min_y": bb_min[:, 1], + "bb_min_z": bb_min[:, 0], + "bb_max_x": bb_max[:, 2], + "bb_max_y": bb_max[:, 1], + "bb_max_z": bb_max[:, 0], }) return table diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py index bca605d..2abcc41 100644 --- a/flamingo_tools/test_data.py +++ b/flamingo_tools/test_data.py @@ -1,7 +1,35 @@ import os +from typing import Tuple import imageio.v3 as imageio from skimage.data import binary_blobs +from skimage.measure import label + +from .segmentation.postprocessing import _compute_table + + +def create_image_data_and_segmentation( + folder: str, size: int = 256 +) -> Tuple[str, str, str]: + """Create test data containing an image, a corresponding segmentation and segmentation table. + + Args: + folder: The test data folder. + """ + os.makedirs(folder, exist_ok=True) + data = binary_blobs(size, n_dim=3).astype("uint8") * 255 + seg = label(data) + + image_path = os.path.join(folder, "image.tif") + segmentation_path = os.path.join(folder, "segmentation.tif") + imageio.imwrite(image_path, data) + imageio.imwrite(segmentation_path, seg) + + table_path = os.path.join(folder, "default.tsv") + table = _compute_table(seg, resolution=0.38) + table.to_csv(table_path, sep="\t", index=False) + + return image_path, segmentation_path, table_path # TODO add metadata diff --git a/test/test_measurements.py b/test/test_measurements.py new file mode 100644 index 0000000..cc4ee88 --- /dev/null +++ b/test/test_measurements.py @@ -0,0 +1,42 @@ +import os +import unittest +from shutil import rmtree + +import pandas as pd + + +class TestDataConversion(unittest.TestCase): + folder = "./tmp" + + def setUp(self): + from flamingo_tools.test_data import create_image_data_and_segmentation + + self.image_path, self.seg_path, self.table_path =\ + create_image_data_and_segmentation(self.folder) + + def tearDown(self): + try: + rmtree(self.folder) + except Exception: + pass + + def test_compute_object_measures(self): + from flamingo_tools.measurements import compute_object_measures + + output_path = os.path.join(self.folder, "measurements.tsv") + compute_object_measures( + self.image_path, self.seg_path, self.table_path, output_path, n_threads=1 + ) + self.assertTrue(os.path.exists(output_path)) + + table = pd.read_csv(output_path, sep="\t") + self.assertTrue(len(table) >= 1) + expected_columns = ["label_id", "mean", "stdev", "min", "max", "median"] + expected_columns.extend([f"percentile-{p}" for p in (5, 10, 25, 75, 90, 95)]) + expected_columns.extend(["volume", "surface"]) + for col in expected_columns: + self.assertIn(col, table.columns) + + +if __name__ == "__main__": + unittest.main() From 5b38552b73029ea37d1b33c1eb0ac343f93dbada Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 8 May 2025 13:32:51 +0200 Subject: [PATCH 03/13] Add trimesh dependency --- environment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yaml b/environment.yaml index 2aedfb7..4dca027 100644 --- a/environment.yaml +++ b/environment.yaml @@ -12,5 +12,6 @@ dependencies: - pytorch - s3fs - torch_em + - trimesh - z5py - zarr From 9d998f549258cb1dc368faa44dd6b22228e884e6 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 8 May 2025 19:21:07 +0200 Subject: [PATCH 04/13] Update the measurement implementation --- flamingo_tools/measurements.py | 80 ++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index be6bdaa..5e314d1 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -9,6 +9,7 @@ from tqdm import tqdm from .file_utils import read_image_data +from .segmentation.postprocessing import _compute_table def _measure_volume_and_surface(mask, resolution): @@ -25,35 +26,26 @@ def _measure_volume_and_surface(mask, resolution): return volume, surface -# Could also support s3 directly? -def compute_object_measures( - image_path: str, - segmentation_path: str, - segmentation_table_path: str, - output_table_path: str, - image_key: Optional[str] = None, - segmentation_key: Optional[str] = None, +def compute_object_measures_impl( + image: np.typing.ArrayLike, + segmentation: np.typing.ArrayLike, n_threads: Optional[int] = None, resolution: float = 0.38, -): - """ + table: Optional[pd.DataFrame] = None, +) -> pd.DataFrame: + """Compute simple intensity and morphology measures for each segmented cell in a segmentation. + + See `compute_object_measures` for details. Args: - image_path: - segmentation_path: - segmentation_table_path: - output_table_path: - image_key: - segmentation_key: - n_threads: - resolution: + image: The image data. + segmentation: The segmentation. + n_threads: The number of threads to use for computation. + resolution: The resolution / voxel size of the data. + table: The segmentation table. Will be computed on the fly if it is not given. """ - # First, we load the pre-computed segmentation table from MoBIE. - table = pd.read_csv(segmentation_table_path, sep="\t") - - # Then, open the volumes. - image = read_image_data(image_path, image_key) - segmentation = read_image_data(segmentation_path, segmentation_key) + if table is None: + table = _compute_table(segmentation, resolution) def intensity_measures(seg_id): # Get the bounding box. @@ -107,4 +99,44 @@ def intensity_measures(seg_id): # Create the result table and save it. keys = measures[0].keys() measures = pd.DataFrame({k: [measure[k] for measure in measures] for k in keys}) + return measures + + +# Could also support s3 directly? +def compute_object_measures( + image_path: str, + segmentation_path: str, + segmentation_table_path: str, + output_table_path: str, + image_key: Optional[str] = None, + segmentation_key: Optional[str] = None, + n_threads: Optional[int] = None, + resolution: float = 0.38, +) -> None: + """Compute simple intensity and morphology measures for each segmented cell in a segmentation. + + This computes the mean, standard deviation, minimum, maximum, median and + 5th, 10th, 25th, 75th, 90th and 95th percentile of the intensity image + per cell, as well as the volume and surface. + + Args: + image_path: The filepath to the image data. Either a tif or hdf5/zarr/n5 file. + segmentation_path: The filepath to the segmentation data. Either a tif or hdf5/zarr/n5 file. + segmentation_table_path: The path to the segmentation table in MoBIE format. + output_table_path: The path for saving the segmentation with intensity measures. + image_key: The key (= internal path) for the image data. Not needed fir tif. + segmentation_key: The key (= internal path) for the segmentation data. Not needed for tif. + n_threads: The number of threads to use for computation. + resolution: The resolution / voxel size of the data. + """ + # First, we load the pre-computed segmentation table from MoBIE. + table = pd.read_csv(segmentation_table_path, sep="\t") + + # Then, open the volumes. + image = read_image_data(image_path, image_key) + segmentation = read_image_data(segmentation_path, segmentation_key) + + measures = compute_object_measures_impl( + image, segmentation, n_threads, resolution, table=table + ) measures.to_csv(output_table_path, sep="\t", index=False) From 33aae708c6ae25fcd0d4b126a025613a2feccb4a Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 9 May 2025 10:27:50 +0200 Subject: [PATCH 05/13] Update the measurement test --- test/test_measurements.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test_measurements.py b/test/test_measurements.py index cc4ee88..48f1667 100644 --- a/test/test_measurements.py +++ b/test/test_measurements.py @@ -2,6 +2,7 @@ import unittest from shutil import rmtree +import imageio.v3 as imageio import pandas as pd @@ -37,6 +38,10 @@ def test_compute_object_measures(self): for col in expected_columns: self.assertIn(col, table.columns) + n_objects = int(imageio.imread(self.seg_path).max()) + expected_shape = (n_objects, len(expected_columns)) + self.assertEqual(table.shape, expected_shape) + if __name__ == "__main__": unittest.main() From 03ec9552166d98c25911441ee8a7f6e86442a99c Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 13 May 2025 21:22:22 +0200 Subject: [PATCH 06/13] Add segmentation and measurement scripts for SGN stainings --- .../segmentation/unet_prediction.py | 5 +- .../check_segmentation.py | 35 +++++++++++ .../measure_intensities.py | 39 +++++++++++++ .../sgn_stain_predictions/run_prediction.py | 58 +++++++++++++++++++ 4 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 scripts/sgn_stain_predictions/check_segmentation.py create mode 100644 scripts/sgn_stain_predictions/measure_intensities.py create mode 100644 scripts/sgn_stain_predictions/run_prediction.py diff --git a/flamingo_tools/segmentation/unet_prediction.py b/flamingo_tools/segmentation/unet_prediction.py index a1d1ba8..8b754d3 100644 --- a/flamingo_tools/segmentation/unet_prediction.py +++ b/flamingo_tools/segmentation/unet_prediction.py @@ -324,6 +324,7 @@ def run_unet_prediction( scale: Optional[float] = None, block_shape: Optional[Tuple[int, int, int]] = None, halo: Optional[Tuple[int, int, int]] = None, + use_mask: bool = True, ) -> None: """Run prediction and segmentation with a distance U-Net. @@ -337,10 +338,12 @@ def run_unet_prediction( By default the data will not be rescaled. block_shape: The block-shape for running the prediction. halo: The halo (= block overlap) to use for prediction. + use_mask: Whether to use the masking heuristics to not run inference on empty blocks. """ os.makedirs(output_folder, exist_ok=True) - find_mask(input_path, input_key, output_folder) + if use_mask: + find_mask(input_path, input_key, output_folder) original_shape = prediction_impl( input_path, input_key, output_folder, model_path, scale, block_shape, halo diff --git a/scripts/sgn_stain_predictions/check_segmentation.py b/scripts/sgn_stain_predictions/check_segmentation.py new file mode 100644 index 0000000..3a4799c --- /dev/null +++ b/scripts/sgn_stain_predictions/check_segmentation.py @@ -0,0 +1,35 @@ +import os +from glob import glob + +import imageio.v3 as imageio +import napari + + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops" +SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations" + + +def main(): + files = sorted(glob(os.path.join(ROOT, "**/*.tif"))) + for ff in files: + if "segmentations" in ff: + return + print("Visualizing", ff) + rel_path = os.path.relpath(ff, ROOT) + seg_path = os.path.join(SAVE_ROOT, rel_path) + + image = imageio.imread(ff) + if os.path.exists(seg_path): + seg = imageio.imread(seg_path) + else: + seg = None + + v = napari.Viewer() + v.add_image(image) + if seg is not None: + v.add_labels(seg) + napari.run() + + +if __name__ == "__main__": + main() diff --git a/scripts/sgn_stain_predictions/measure_intensities.py b/scripts/sgn_stain_predictions/measure_intensities.py new file mode 100644 index 0000000..99f67ac --- /dev/null +++ b/scripts/sgn_stain_predictions/measure_intensities.py @@ -0,0 +1,39 @@ +import os +from glob import glob + +import tifffile +from flamingo_tools.measurements import compute_object_measures_impl + + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops" +SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations" + + +def measure_intensities(ff): + rel_path = os.path.relpath(ff, ROOT) + out_path = os.path.join("./measurements", rel_path.replace(".tif", ".xlsx")) + if os.path.exists(out_path): + return + + print("Computing measurements for", rel_path) + seg_path = os.path.join(SAVE_ROOT, rel_path) + + image_data = tifffile.memmap(ff) + seg_data = tifffile.memmap(seg_path) + + table = compute_object_measures_impl(image_data, seg_data, n_threads=8) + + os.makedirs(os.path.split(out_path)[0], exist_ok=True) + table.to_excel(out_path, index=False) + + +def main(): + files = sorted(glob(os.path.join(ROOT, "**/*.tif"))) + for ff in files: + if "segmentations" in ff: + return + measure_intensities(ff) + + +if __name__ == "__main__": + main() diff --git a/scripts/sgn_stain_predictions/run_prediction.py b/scripts/sgn_stain_predictions/run_prediction.py new file mode 100644 index 0000000..1badae3 --- /dev/null +++ b/scripts/sgn_stain_predictions/run_prediction.py @@ -0,0 +1,58 @@ +import os +import tempfile +from glob import glob + +import tifffile +from elf.io import open_file +from flamingo_tools.segmentation import run_unet_prediction + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops" +MODEL_PATH = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/cochlea_distance_unet_SGN_March2025Model" # noqa + +SAVE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/LS_sampleprepcomparison_crops/segmentations" + + +def check_data(): + files = glob(os.path.join(ROOT, "**/*.tif"), recursive=True) + for ff in files: + rel_path = sorted(os.path.relpath(ff, ROOT)) + shape = tifffile.memmap(ff).shape + print(rel_path, shape) + + +def segment_crop(input_file): + fname = os.path.relpath(input_file, ROOT) + out_file = os.path.join(SAVE_ROOT, fname) + if "segmentations" in input_file: + return + if os.path.exists(out_file): + return + + print("Run prediction for", input_file) + os.makedirs(os.path.split(out_file)[0], exist_ok=True) + with tempfile.TemporaryDirectory() as tmp_folder: + run_unet_prediction( + input_file, input_key=None, output_folder=tmp_folder, + model_path=MODEL_PATH, min_size=1000, use_mask=False, + ) + seg_path = os.path.join(tmp_folder, "segmentation.zarr") + with open_file(seg_path, mode="r") as f: + seg = f["segmentation"][:] + + print("Writing output to", out_file) + tifffile.imwrite(out_file, seg, bigtiff=True) + + +def segment_all(): + files = sorted(glob(os.path.join(ROOT, "**/*.tif"), recursive=True)) + for ff in files: + segment_crop(ff) + + +def main(): + # check_data() + segment_all() + + +if __name__ == "__main__": + main() From 359b3e3e408a904779cd9ab7734286a46ce3bb32 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 13 May 2025 22:02:13 +0200 Subject: [PATCH 07/13] Update measurement test --- flamingo_tools/test_data.py | 33 +++++++++++++++++++++++++++++---- test/test_measurements.py | 7 +++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py index 2abcc41..4f9f5ef 100644 --- a/flamingo_tools/test_data.py +++ b/flamingo_tools/test_data.py @@ -2,15 +2,40 @@ from typing import Tuple import imageio.v3 as imageio -from skimage.data import binary_blobs +import requests +from skimage.data import binary_blobs, cells3d from skimage.measure import label from .segmentation.postprocessing import _compute_table +SEGMENTATION_URL = "https://owncloud.gwdg.de/index.php/s/kwoGRYiJRRrswgw/download" -def create_image_data_and_segmentation( - folder: str, size: int = 256 -) -> Tuple[str, str, str]: + +def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]: + os.makedirs(folder, exist_ok=True) + + segmentation_path = os.path.join(folder, "segmentation.tif") + resp = requests.get(SEGMENTATION_URL) + resp.raise_for_status() + + with open(segmentation_path, "wb") as f: + f.write(resp.content) + + nuclei = cells3d()[20:40, 1] + segmentation = imageio.imread(segmentation_path) + assert nuclei.shape == segmentation.shape + + image_path = os.path.join(folder, "image.tif") + imageio.imwrite(image_path, nuclei) + + table_path = os.path.join(folder, "default.tsv") + table = _compute_table(segmentation, resolution=0.38) + table.to_csv(table_path, sep="\t", index=False) + + return image_path, segmentation_path, table_path + + +def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[str, str, str]: """Create test data containing an image, a corresponding segmentation and segmentation table. Args: diff --git a/test/test_measurements.py b/test/test_measurements.py index 48f1667..cecb305 100644 --- a/test/test_measurements.py +++ b/test/test_measurements.py @@ -6,14 +6,13 @@ import pandas as pd -class TestDataConversion(unittest.TestCase): +class TestMeasurements(unittest.TestCase): folder = "./tmp" def setUp(self): - from flamingo_tools.test_data import create_image_data_and_segmentation + from flamingo_tools.test_data import get_test_volume_and_segmentation - self.image_path, self.seg_path, self.table_path =\ - create_image_data_and_segmentation(self.folder) + self.image_path, self.seg_path, self.table_path = get_test_volume_and_segmentation(self.folder) def tearDown(self): try: From 158afcb9588ddf6cf36074773c40ebb32a233c30 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 13 May 2025 22:24:03 +0200 Subject: [PATCH 08/13] Update measurement tests --- flamingo_tools/measurements.py | 4 ++-- test/test_measurements.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index 5e314d1..8114c7b 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -89,11 +89,11 @@ def intensity_measures(seg_id): return measures seg_ids = table.label_id.values + assert len(seg_ids) > 0, "The segmentation table is empty." n_threads = mp.cpu_count() if n_threads is None else n_threads with futures.ThreadPoolExecutor(n_threads) as pool: measures = list(tqdm( - pool.map(intensity_measures, seg_ids), - total=len(seg_ids), desc="Compute intensity measures" + pool.map(intensity_measures, seg_ids), total=len(seg_ids), desc="Compute intensity measures" )) # Create the result table and save it. diff --git a/test/test_measurements.py b/test/test_measurements.py index cecb305..7ab67e3 100644 --- a/test/test_measurements.py +++ b/test/test_measurements.py @@ -4,6 +4,8 @@ import imageio.v3 as imageio import pandas as pd +import numpy as np +from skimage.measure import regionprops_table class TestMeasurements(unittest.TestCase): @@ -41,6 +43,18 @@ def test_compute_object_measures(self): expected_shape = (n_objects, len(expected_columns)) self.assertEqual(table.shape, expected_shape) + image = imageio.imread(self.image_path) + segmentation = imageio.imread(self.seg_path) + properties = ("label", "intensity_mean", "intensity_std", "intensity_min", "intensity_max") + expected_measures = regionprops_table(segmentation, intensity_image=image, properties=properties) + expected_measures = pd.DataFrame(expected_measures) + + for (col, col_exp) in [ + ("label_id", "label"), ("mean", "intensity_mean"), ("stdev", "intensity_std"), + ("min", "intensity_min"), ("max", "intensity_max"), + ]: + self.assertTrue(np.allclose(table[col].values, expected_measures[col_exp].values)) + if __name__ == "__main__": unittest.main() From 632834bd54b71826f9a641c22ba42ff006642d8e Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 13 May 2025 22:30:57 +0200 Subject: [PATCH 09/13] Try to fix CI test --- flamingo_tools/measurements.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index 8114c7b..ac126a6 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -75,8 +75,8 @@ def intensity_measures(seg_id): "label_id": seg_id, "mean": np.mean(masked_intensity), "stdev": np.std(masked_intensity), - "min": np.min(masked_intensity), - "max": np.max(masked_intensity), + "min": np.nanmin(masked_intensity), + "max": np.nanmax(masked_intensity), "median": np.median(masked_intensity), } for percentile in (5, 10, 25, 75, 90, 95): From a4d601ca6b20b9772180c4943f04bf190ef8a727 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 13 May 2025 22:36:42 +0200 Subject: [PATCH 10/13] Try to debug CLI tests --- flamingo_tools/measurements.py | 9 +++++---- flamingo_tools/segmentation/postprocessing.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index ac126a6..505a814 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -53,12 +53,12 @@ def intensity_measures(seg_id): bb_min = np.array([ row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item() - ]) / resolution + ]).astype("float32") / resolution bb_min = np.round(bb_min, 0).astype("uint32") bb_max = np.array([ row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item() - ]) / resolution + ]).astype("float32") / resolution bb_max = np.round(bb_max, 0).astype("uint32") bb = tuple( @@ -68,6 +68,7 @@ def intensity_measures(seg_id): local_image = image[bb] mask = segmentation[bb] == seg_id + assert mask.sum() > 0, f"Segmentation ID {seg_id} is empty." masked_intensity = local_image[mask] # Do the base intensity measurements. @@ -75,8 +76,8 @@ def intensity_measures(seg_id): "label_id": seg_id, "mean": np.mean(masked_intensity), "stdev": np.std(masked_intensity), - "min": np.nanmin(masked_intensity), - "max": np.nanmax(masked_intensity), + "min": np.min(masked_intensity), + "max": np.max(masked_intensity), "median": np.median(masked_intensity), } for percentile in (5, 10, 25, 75, 90, 95): diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py index a7749a8..7716662 100644 --- a/flamingo_tools/segmentation/postprocessing.py +++ b/flamingo_tools/segmentation/postprocessing.py @@ -118,11 +118,11 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray: def _compute_table(segmentation, resolution): props = measure.regionprops(segmentation) label_ids = np.array([prop.label for prop in props]) - coordinates = np.array([prop.centroid for prop in props]) + coordinates = np.array([prop.centroid for prop in props]).astype("float32") # transform pixel distance to physical units coordinates = coordinates * resolution - bb_min = np.array([prop.bbox[:3] for prop in props]) * resolution - bb_max = np.array([prop.bbox[3:] for prop in props]) * resolution + bb_min = np.array([prop.bbox[:3] for prop in props]).astype("float32") * resolution + bb_max = np.array([prop.bbox[3:] for prop in props]).astype("float32") * resolution sizes = np.array([prop.area for prop in props]) table = pd.DataFrame({ "label_id": label_ids, From b70c121bef56134bb43334fc29ac019d99f79e07 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 14 May 2025 08:30:00 +0200 Subject: [PATCH 11/13] Add test for segmentation table --- flamingo_tools/measurements.py | 4 +-- flamingo_tools/segmentation/postprocessing.py | 9 +++--- flamingo_tools/test_data.py | 13 ++++++-- test/test_segmentation/test_postprocessing.py | 31 ++++++++++++++++++- 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index 505a814..755c883 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -9,7 +9,7 @@ from tqdm import tqdm from .file_utils import read_image_data -from .segmentation.postprocessing import _compute_table +from .segmentation.postprocessing import compute_table_on_the_fly def _measure_volume_and_surface(mask, resolution): @@ -45,7 +45,7 @@ def compute_object_measures_impl( table: The segmentation table. Will be computed on the fly if it is not given. """ if table is None: - table = _compute_table(segmentation, resolution) + table = compute_table_on_the_fly(segmentation, resolution=resolution) def intensity_measures(seg_id): # Get the bounding box. diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py index 7716662..9b0f6ea 100644 --- a/flamingo_tools/segmentation/postprocessing.py +++ b/flamingo_tools/segmentation/postprocessing.py @@ -115,7 +115,9 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray: # -def _compute_table(segmentation, resolution): +def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: float) -> pd.DataFrame: + """ + """ props = measure.regionprops(segmentation) label_ids = np.array([prop.label for prop in props]) coordinates = np.array([prop.centroid for prop in props]).astype("float32") @@ -171,10 +173,9 @@ def filter_segmentation( n_ids n_ids_filtered """ - # Compute the table on the fly. - # NOTE: this currently doesn't work for large segmentations. + # Compute the table on the fly. This doesn't work for large segmentations. if table is None: - table = _compute_table(segmentation, resolution=resolution) + table = compute_table_on_the_fly(segmentation, resolution=resolution) n_ids = len(table) # First apply the size filter. diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py index 4f9f5ef..fec082d 100644 --- a/flamingo_tools/test_data.py +++ b/flamingo_tools/test_data.py @@ -6,12 +6,19 @@ from skimage.data import binary_blobs, cells3d from skimage.measure import label -from .segmentation.postprocessing import _compute_table +from .segmentation.postprocessing import compute_table_on_the_fly SEGMENTATION_URL = "https://owncloud.gwdg.de/index.php/s/kwoGRYiJRRrswgw/download" def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]: + """Download a small volume with nuclei and corresponding segmentation. + + Args: + folder: + + Returns: + """ os.makedirs(folder, exist_ok=True) segmentation_path = os.path.join(folder, "segmentation.tif") @@ -29,7 +36,7 @@ def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]: imageio.imwrite(image_path, nuclei) table_path = os.path.join(folder, "default.tsv") - table = _compute_table(segmentation, resolution=0.38) + table = compute_table_on_the_fly(segmentation, resolution=0.38) table.to_csv(table_path, sep="\t", index=False) return image_path, segmentation_path, table_path @@ -51,7 +58,7 @@ def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[st imageio.imwrite(segmentation_path, seg) table_path = os.path.join(folder, "default.tsv") - table = _compute_table(seg, resolution=0.38) + table = compute_table_on_the_fly(seg, resolution=0.38) table.to_csv(table_path, sep="\t", index=False) return image_path, segmentation_path, table_path diff --git a/test/test_segmentation/test_postprocessing.py b/test/test_segmentation/test_postprocessing.py index 531e2d2..c8be572 100644 --- a/test/test_segmentation/test_postprocessing.py +++ b/test/test_segmentation/test_postprocessing.py @@ -2,9 +2,12 @@ import tempfile import unittest +import imageio.v3 as imageio +import numpy as np +import pandas as pd from elf.io import open_file from skimage.data import binary_blobs -from skimage.measure import label +from skimage.measure import label, regionprops_table class TestPostprocessing(unittest.TestCase): @@ -44,6 +47,32 @@ def test_neighbors_in_radius(self): self._test_postprocessing(neighbors_in_radius, threshold=5) + def test_compute_table_on_the_fly(self): + from flamingo_tools.segmentation.postprocessing import compute_table_on_the_fly + from flamingo_tools.test_data import get_test_volume_and_segmentation + + with tempfile.TemporaryDirectory() as tmp_dir: + _, seg_path, _ = get_test_volume_and_segmentation(tmp_dir) + segmentation = imageio.imread(seg_path) + + resolution = 0.38 + table = compute_table_on_the_fly(segmentation, resolution=resolution) + + properties = ("label", "bbox", "centroid") + expected_table = regionprops_table(segmentation, properties=properties) + expected_table = pd.DataFrame(expected_table) + + for (col, col_exp) in [ + ("label_id", "label"), + ("anchor_x", "centroid-2"), ("anchor_y", "centroid-1"), ("anchor_z", "centroid-0"), + ("bb_min_x", "bbox-2"), ("bb_min_y", "bbox-1"), ("bb_min_z", "bbox-0"), + ("bb_max_x", "bbox-5"), ("bb_max_y", "bbox-4"), ("bb_max_z", "bbox-3"), + ]: + values = table[col].values + if col != "label_id": + values /= resolution + self.assertTrue(np.allclose(values, expected_table[col_exp].values)) + if __name__ == "__main__": unittest.main() From 944f6eebd83d172ff00f37f3899863dbcddf3630 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 14 May 2025 08:34:32 +0200 Subject: [PATCH 12/13] Try to fix CI --- flamingo_tools/measurements.py | 4 ++-- flamingo_tools/test_data.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index 755c883..99b6f95 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -54,12 +54,12 @@ def intensity_measures(seg_id): bb_min = np.array([ row.bb_min_z.item(), row.bb_min_y.item(), row.bb_min_x.item() ]).astype("float32") / resolution - bb_min = np.round(bb_min, 0).astype("uint32") + bb_min = np.round(bb_min, 0).astype("int32") bb_max = np.array([ row.bb_max_z.item(), row.bb_max_y.item(), row.bb_max_x.item() ]).astype("float32") / resolution - bb_max = np.round(bb_max, 0).astype("uint32") + bb_max = np.round(bb_max, 0).astype("int32") bb = tuple( slice(max(bmin - 1, 0), min(bmax + 1, sh)) diff --git a/flamingo_tools/test_data.py b/flamingo_tools/test_data.py index fec082d..7450a3b 100644 --- a/flamingo_tools/test_data.py +++ b/flamingo_tools/test_data.py @@ -15,9 +15,12 @@ def get_test_volume_and_segmentation(folder: str) -> Tuple[str, str, str]: """Download a small volume with nuclei and corresponding segmentation. Args: - folder: + folder: The test data folder. The data will be downloaded to this folder. Returns: + The path to the image, stored as tif. + The path to the segmentation, stored as tif. + The path to the segmentation table, stored as tsv. """ os.makedirs(folder, exist_ok=True) @@ -46,7 +49,12 @@ def create_image_data_and_segmentation(folder: str, size: int = 256) -> Tuple[st """Create test data containing an image, a corresponding segmentation and segmentation table. Args: - folder: The test data folder. + folder: The test data folder. The data will be written to this folder. + + Returns: + The path to the image, stored as tif. + The path to the segmentation, stored as tif. + The path to the segmentation table, stored as tsv. """ os.makedirs(folder, exist_ok=True) data = binary_blobs(size, n_dim=3).astype("uint8") * 255 From f2ecef02de0c6a4395d22cd705cea8d7928af4b5 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 14 May 2025 08:39:15 +0200 Subject: [PATCH 13/13] Update doc strings --- flamingo_tools/measurements.py | 3 +++ flamingo_tools/segmentation/postprocessing.py | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py index 99b6f95..442b4fe 100644 --- a/flamingo_tools/measurements.py +++ b/flamingo_tools/measurements.py @@ -43,6 +43,9 @@ def compute_object_measures_impl( n_threads: The number of threads to use for computation. resolution: The resolution / voxel size of the data. table: The segmentation table. Will be computed on the fly if it is not given. + + Returns: + The table with per object measurements. """ if table is None: table = compute_table_on_the_fly(segmentation, resolution=resolution) diff --git a/flamingo_tools/segmentation/postprocessing.py b/flamingo_tools/segmentation/postprocessing.py index 9b0f6ea..3b24ce9 100644 --- a/flamingo_tools/segmentation/postprocessing.py +++ b/flamingo_tools/segmentation/postprocessing.py @@ -116,7 +116,17 @@ def neighbors_in_radius(table: pd.DataFrame, radius: float = 15) -> np.ndarray: def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: float) -> pd.DataFrame: - """ + """Compute a segmentation table compatible with MoBIE. + + The table contains information about the number of pixels per object, + the anchor (= centroid) and the bounding box. Anchor and bounding box are given in physical coordinates. + + Args: + segmentation: The segmentation for which to compute the table. + resolution: The physical voxel spacing of the data. + + Returns: + The segmentation table. """ props = measure.regionprops(segmentation) label_ids = np.array([prop.label for prop in props]) @@ -128,7 +138,6 @@ def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: floa sizes = np.array([prop.area for prop in props]) table = pd.DataFrame({ "label_id": label_ids, - "n_pixels": sizes, "anchor_x": coordinates[:, 2], "anchor_y": coordinates[:, 1], "anchor_z": coordinates[:, 0], @@ -138,6 +147,7 @@ def compute_table_on_the_fly(segmentation: np.typing.ArrayLike, resolution: floa "bb_max_x": bb_max[:, 2], "bb_max_y": bb_max[:, 1], "bb_max_z": bb_max[:, 0], + "n_pixels": sizes, }) return table @@ -170,8 +180,8 @@ def filter_segmentation( spatial_statistics_kwargs: Arguments for spatial statistics function Returns: - n_ids - n_ids_filtered + The number of objects before filtering. + The number of objects after filtering. """ # Compute the table on the fly. This doesn't work for large segmentations. if table is None: