minor things for analysis; 1st implementation of surface dice for eval

SarahMuth · SarahMuth · commit d966ea35a021 · 2025-07-02T15:56:30.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,4 @@ scripts/cooper/training/find_rec_testset.py
 synapse-net-models/
 scripts/portal/upscale_tomo.py
 analysis_results/
+scripts/cooper/revision/evaluation_results/
diff --git a/run_sbatch_revision.sbatch b/run_sbatch_revision.sbatch
@@ -1,15 +1,15 @@
 #! /bin/bash
 #SBATCH -c 4 #4 #8
-#SBATCH --mem 256G #120G #32G #64G #256G
+#SBATCH --mem 120G #120G #32G #64G #256G
 #SBATCH -p grete:shared #grete:shared #grete-h100:shared
 #SBATCH -t 4:00:00 #6:00:00 #48:00:00 
 #SBATCH -G A100:1 #V100:1 #2 #A100:1  #gtx1080:2 #v100:1 #H100:1
 #SBATCH --output=/user/muth9/u12095/synapse-net/slurm_revision/slurm-%j.out 
-#SBATCH -A nim00007 
-#SBATCH --constraint 80gb 
+#SBATCH -A nim00007 #SBATCH --constraint 80gb 
 
 source ~/.bashrc
 conda activate synapse-net
 python /user/muth9/u12095/synapse-net/scripts/cooper/revision/updated_data_analysis/run_data_analysis.py \
- -i /mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/SNAP25/ \
- -o /mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/afterRevision_analysis/boundaryT0_9_constantins_presynapticFiltering --store
+ -i /mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/final_Imig2014_seg_autoComp/SNAP25/ \
+ -o /mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/afterRevision_analysis/boundaryT0_9_constantins_presynapticFiltering/full_dataset --store \
+ -s ./analysis_results/full_dataset
diff --git a/scripts/cooper/revision/surface_dice.py b/scripts/cooper/revision/surface_dice.py
@@ -0,0 +1,114 @@
+import sys
+import os
+
+# Add membrain-seg to Python path
+MEMBRAIN_SEG_PATH = "/user/muth9/u12095/membrain-seg/src"
+if MEMBRAIN_SEG_PATH not in sys.path:
+    sys.path.insert(0, MEMBRAIN_SEG_PATH)
+
+import argparse
+import h5py
+import pandas as pd
+from tqdm import tqdm
+import numpy as np
+
+from membrain_seg.segmentation.skeletonize import skeletonization
+from membrain_seg.benchmark.metrics import masked_surface_dice
+
+
+def load_segmentation(file_path, key):
+    """Load a dataset from an HDF5 file."""
+    with h5py.File(file_path, "r") as f:
+        data = f[key][:]
+    return data
+
+
+def evaluate_surface_dice(pred, gt, raw, check):
+    """Skeletonize predictions and GT, compute surface dice."""
+    gt_skeleton = skeletonization(gt == 1, batch_size=100000)
+    pred_skeleton = skeletonization(pred, batch_size=100000)
+    mask = gt != 2
+
+    if check:
+        import napari
+        v = napari.Viewer()
+        v.add_image(raw)
+        v.add_labels(gt, name= f"gt")
+        v.add_labels(gt_skeleton.astype(np.uint16), name= f"gt_skeleton")
+        v.add_labels(pred, name= f"pred")
+        v.add_labels(pred_skeleton.astype(np.uint16), name= f"pred_skeleton")
+        napari.run()
+
+    surf_dice, confusion_dict = masked_surface_dice(
+        pred_skeleton, gt_skeleton, pred, gt, mask
+    )
+    return surf_dice, confusion_dict
+
+
+def process_file(pred_path, gt_path, seg_key, gt_key, check):
+    """Process a single prediction/GT file pair."""
+    try:
+        pred = load_segmentation(pred_path, seg_key)
+        gt = load_segmentation(gt_path, gt_key)
+        raw = load_segmentation(gt_path, "raw")
+        surf_dice, confusion = evaluate_surface_dice(pred, gt, raw, check)
+
+        result = {
+            "tomo_name": os.path.basename(pred_path),
+            "surface_dice": surf_dice,
+            **confusion,
+        }
+        return result
+
+    except Exception as e:
+        print(f"Error processing {pred_path}: {e}")
+        return None
+
+
+def collect_results(input_folder, gt_folder, version, check=False):
+    """Loop through prediction files and compute metrics."""
+    results = []
+    seg_key = f"predictions/az/seg_v{version}"
+    gt_key = "/labels/az_merged"
+
+    for fname in tqdm(os.listdir(input_folder), desc="Processing segmentations"):
+        if not fname.endswith(".h5"):
+            continue
+
+        pred_path = os.path.join(input_folder, fname)
+        gt_path = os.path.join(gt_folder, fname)
+
+        if not os.path.exists(gt_path):
+            print(f"Warning: Ground truth file not found for {fname}")
+            continue
+
+        result = process_file(pred_path, gt_path, seg_key, gt_key, check)
+        if result:
+            results.append(result)
+
+    return results
+
+
+def save_results(results, output_file):
+    """Save results as an Excel file."""
+    df = pd.DataFrame(results)
+    df.to_excel(output_file, index=False)
+    print(f"Results saved to {output_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compute surface dice for AZ segmentations.")
+    parser.add_argument("--input_folder", "-i", required=True, help="Folder with predicted segmentations (.h5)")
+    parser.add_argument("--gt_folder", "-gt", required=True, help="Folder with ground truth segmentations (.h5)")
+    parser.add_argument("--version", "-v", required=True, help="Version string used in prediction key")
+    parser.add_argument("--check", action="store_true", help="Version string used in prediction key")
+
+    args = parser.parse_args()
+
+    output_file = f"/user/muth9/u12095/synapse-net/scripts/cooper/revision/evaluation_results/v{args.version}_surface_dice.xlsx"
+    results = collect_results(args.input_folder, args.gt_folder, args.version, args.check)
+    save_results(results, output_file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/cooper/revision/updated_data_analysis/analysis_segmentations.py b/scripts/cooper/revision/updated_data_analysis/analysis_segmentations.py
@@ -62,8 +62,8 @@ def SV_pred(raw: np.ndarray, SV_model: str, output_path: str = None, store: bool
 
     use_existing_seg = False
     #checking if segmentation is already in output path and if so, use it
-    if output_path:
-        with h5py.File(output_path, "a") as f:
+    if output_path and os.path.exists(output_path):
+        with h5py.File(output_path, "r") as f:
             if seg_key in f:
                 seg = f[seg_key][:]
                 use_existing_seg = True
@@ -108,10 +108,11 @@ def compartment_pred(raw: np.ndarray, compartment_model: str, output_path: str =
 
     use_existing_seg = False
     #checking if segmentation is already in output path and if so, use it
-    if output_path:
-        with h5py.File(output_path, "a") as f:
-            if seg_key in f:
+    if output_path and os.path.exists(output_path):
+        with h5py.File(output_path, "r") as f:
+            if seg_key in f and pred_key in f:
                 seg = f[seg_key][:]
+                pred = f[pred_key][:]
                 use_existing_seg = True
                 print(f"Using existing compartment seg in {output_path}")
 
@@ -152,8 +153,8 @@ def AZ_pred(raw: np.ndarray, AZ_model: str, output_path: str = None, store: bool
 
     use_existing_seg = False
     #checking if segmentation is already in output path and if so, use it
-    if output_path:
-        with h5py.File(output_path, "a") as f:
+    if output_path and os.path.exists(output_path):
+        with h5py.File(output_path, "r") as f:
             if seg_key in f:
                 seg = f[seg_key][:]
                 use_existing_seg = True
diff --git a/scripts/cooper/revision/updated_data_analysis/run_data_analysis.py b/scripts/cooper/revision/updated_data_analysis/run_data_analysis.py
@@ -80,7 +80,7 @@ def main():
         run_data_analysis(input_path, output_path, store, resolution, analysis_output)
 
     elif os.path.isdir(input_path):
-            h5_files = [file for file in os.listdir(input_path) if file.endswith(".h5")]
+            h5_files = sorted([file for file in os.listdir(input_path) if file.endswith(".h5")])
             for file in tqdm(h5_files, desc="Processing files"):
                 full_input_path = os.path.join(input_path, file)
                 output_path = os.path.join(output_folder, file) if output_folder else None