Implement AZ evaluation WIP

constantinpape · constantinpape · commit 39d69d5af9bd · 2025-05-25T16:00:42.000+02:00
diff --git a/scripts/cooper/revision/generate_az_eval_data.py b/scripts/cooper/revision/generate_az_eval_data.py
@@ -1,13 +1,31 @@
 from synapse_net.sample_data import get_sample_data
-from synapse_net.inference import run_segmentation, get_model
 from elf.io import open_file
 
 
 sample_data = get_sample_data("tem_tomo")
 tomo = open_file(sample_data, "r")["data"][:]
 
-model = get_model("active_zone")
-seg = run_segmentation(tomo, model, "active_zone")
 
-with open_file("./pred.h5", "a") as f:
-    f.create_dataset("pred", data=seg, compression="gzip")
+def run_prediction():
+    from synapse_net.inference import run_segmentation, get_model
+
+    model = get_model("active_zone")
+    seg = run_segmentation(tomo, model, "active_zone")
+
+    with open_file("./pred.h5", "a") as f:
+        f.create_dataset("pred", data=seg, compression="gzip")
+
+
+def check_prediction():
+    import napari
+
+    with open_file("./pred.h5", "r") as f:
+        pred = f["pred"][:]
+
+    v = napari.Viewer()
+    v.add_image(tomo)
+    v.add_labels(pred)
+    napari.run()
+
+
+check_prediction()
diff --git a/scripts/cooper/revision/run_az_evaluation.py b/scripts/cooper/revision/run_az_evaluation.py
@@ -0,0 +1,65 @@
+import argparse
+import os
+from glob import glob
+
+
+def _get_paths(seg_root, gt_root, image_root=None):
+    seg_paths = sorted(glob(os.path.join(seg_root, "**/*.h5"), recursive=True))
+    gt_paths = sorted(glob(os.path.join(gt_root, "**/*.h5"), recursive=True))
+    assert len(seg_paths) == len(gt_paths)
+
+    if image_root is None:
+        image_paths = [None] * len(seg_paths)
+    else:
+        image_paths = sorted(glob(os.path.join(image_root, "**/*.mrc"), recursive=True))
+        assert len(image_paths) == len(seg_paths)
+
+    return seg_paths, gt_paths, image_paths
+
+
+# TODO extend this
+def run_az_evaluation(args):
+    from synapse_net.ground_truth.az_evaluation import az_evaluation
+
+    seg_paths, gt_paths, _ = _get_paths(args.seg_root, args.gt_root)
+    result = az_evaluation(seg_paths, gt_paths, seg_key="seg", gt_key="gt")
+
+    print(result)
+
+
+def visualize_az_evaluation(args):
+    from elf.visualisation.metric_visualization import run_metric_visualization
+    from synapse_net.ground_truth.az_evaluation import _postprocess
+    from elf.io import open_file
+
+    seg_paths, gt_paths, image_paths = _get_paths(args.seg_root, args.gt_root, args.image_root)
+    for seg_path, gt_path, image_path in zip(seg_paths, gt_paths, image_paths):
+        image = None if image_path is None else open_file(image_path, "r")["data"][:]
+
+        with open_file(seg_path, "r") as f:
+            seg = f["seg"][:]
+        with open_file(gt_path, "r") as f:
+            gt = f["gt"][:]
+
+        seg = _postprocess(seg, apply_cc=True, min_component_size=100)
+        gt = _postprocess(gt, apply_cc=True, min_component_size=100)
+
+        run_metric_visualization(image, seg, gt)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-s", "--seg_root", required=True)
+    parser.add_argument("-g", "--gt_root", required=True)
+    parser.add_argument("-i", "--image_root")
+    parser.add_argument("--visualize", action="store_true")
+    args = parser.parse_args()
+
+    if args.visualize:
+        visualize_az_evaluation(args)
+    else:
+        run_az_evaluation(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse_net/ground_truth/az_evaluation.py b/synapse_net/ground_truth/az_evaluation.py
@@ -0,0 +1,79 @@
+import os
+from typing import List
+
+import h5py
+import pandas as pd
+import numpy as np
+
+from elf.evaluation.matching import _compute_scores, _compute_tps
+from elf.evaluation import dice_score
+from skimage.measure import label
+from tqdm import tqdm
+
+
+def _postprocess(data, apply_cc, min_component_size):
+    if apply_cc:
+        data = label(data)
+        ids, sizes = np.unique(data, return_counts=True)
+        filter_ids = ids[sizes < min_component_size]
+        data[np.isin(data, filter_ids)] = 0
+    return data
+
+
+def _single_az_evaluation(seg, gt, apply_cc, min_component_size):
+    assert seg.shape == gt.shape, f"{seg.shape}, {gt.shape}"
+    seg = _postprocess(seg, apply_cc, min_component_size)
+    gt = _postprocess(gt, apply_cc, min_component_size)
+
+    dice = dice_score(seg > 0, gt > 0)
+
+    n_true, n_matched, n_pred, scores = _compute_scores(seg, gt, criterion="iou", ignore_label=0)
+    tp = _compute_tps(scores, n_matched, threshold=0.5)
+    fp = n_pred - tp
+    fn = n_true - tp
+
+    return {"tp": tp, "fp": fp, "fn": fn, "dice": dice}
+
+
+# TODO further post-processing?
+def az_evaluation(
+    seg_paths: List[str],
+    gt_paths: List[str],
+    seg_key: str,
+    gt_key: str,
+    apply_cc: bool = True,
+    min_component_size: int = 100,  # TODO
+) -> pd.DataFrame:
+    """Evaluate active zone segmentations against ground-truth annotations.
+
+    Args:
+        seg_paths: The filepaths to the segmentations, stored as hd5 files.
+        gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
+        seg_key: The internal path to the data in the segmentation hdf5 file.
+        gt_key: The internal path to the data in the ground-truth hdf5 file.
+        apply_cc: Whether to apply connected components before evaluation.
+        min_component_size: Minimum component size for filtering the segmentation and annotations before evaluation.
+
+    Returns:
+        A data frame with the evaluation results per tomogram.
+    """
+    assert len(seg_paths) == len(gt_paths)
+
+    results = {
+        "tomo_name": [],
+        "tp": [],
+        "fp": [],
+        "fn": [],
+        "dice": [],
+    }
+    for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"):
+        with h5py.File(seg_path, "r") as f:
+            seg = f[seg_key][:]
+        with h5py.File(gt_path, "r") as f:
+            gt = f[gt_key][:]
+        # TODO more post-processing params
+        result = _single_az_evaluation(seg, gt, apply_cc, min_component_size)
+        results["tomo_name"].append(os.path.basename(seg_path))
+        for res in ("tp", "fp", "fn", "dice"):
+            results[res].append(result[res])
+    return pd.DataFrame(results)