Implement exclusion of zero-mapped synapses from IHC segmentation

constantinpape · constantinpape · commit 3db9e24cd714 · 2025-07-31T16:11:29.000+02:00
diff --git a/flamingo_tools/validation.py b/flamingo_tools/validation.py
@@ -37,6 +37,13 @@ def _parse_annotation_path(annotation_path):
     return cochlea, slice_id
 
 
+def _get_table(fs, cochlea, seg_name):
+    internal_path = os.path.join(BUCKET_NAME, cochlea, "tables",  seg_name, "default.tsv")
+    with fs.open(internal_path, "r") as f:
+        table = pd.read_csv(f, sep="\t")
+    return table
+
+
 def fetch_data_for_evaluation(
     annotation_path: str,
     cache_path: Optional[str] = None,
@@ -45,6 +52,7 @@ def fetch_data_for_evaluation(
     components_for_postprocessing: Optional[List[int]] = None,
     cochlea: Optional[str] = None,
     extra_data: Optional[str] = None,
+    exclude_zero_synapse_count: bool = False,
 ) -> Tuple[np.ndarray, pd.DataFrame]:
     """Fetch segmentation from S3 matching the annotation path for evaluation.
 
@@ -57,6 +65,8 @@ def fetch_data_for_evaluation(
             Choose [1] for the default componentn containing the helix.
         cochlea: Optional name of the cochlea.
         extra_data: Extra data to fetch.
+        exclude_zero_synapse_count: Exclude cells that have zero synapses mapped.
+            This is relevant for the IHC evaluation.
 
     Returns:
         The segmentation downloaded from the S3 bucket.
@@ -96,20 +106,28 @@ def fetch_data_for_evaluation(
     with zarr.open(s3_store, mode="r") as f:
         segmentation = f[input_key][roi]
 
+    table = None
     if components_for_postprocessing is not None:
         # Filter the IDs so that only the ones part of 'components_for_postprocessing_remain'.
-
-        # First, we download the MoBIE table for this segmentation.
-        internal_path = os.path.join(BUCKET_NAME, cochlea, "tables",  seg_name, "default.tsv")
-        with fs.open(internal_path, "r") as f:
-            table = pd.read_csv(f, sep="\t")
+        table = _get_table(fs, cochlea, seg_name)
 
         # Then we get the ids for the components and us them to filter the segmentation.
         component_mask = np.isin(table.component_labels.values, components_for_postprocessing)
         keep_label_ids = table.label_id.values[component_mask].astype("int64")
         filter_mask = ~np.isin(segmentation, keep_label_ids)
         segmentation[filter_mask] = 0
 
+        # We also filter the table accordingly.
+        table = table[table.label_id.isin(keep_label_ids)]
+
+    if exclude_zero_synapse_count:
+        if table is None:
+            table = _get_table(fs, cochlea, seg_name)
+
+        keep_label_ids = table.label_id[table.syn_per_IHC > 0].astype("int64")
+        filter_mask = ~np.isin(segmentation, keep_label_ids)
+        segmentation[filter_mask] = 0
+
     segmentation, _, _ = relabel_sequential(segmentation)
 
     # Cache it if required.
diff --git a/scripts/validation/IHCs/run_evaluation.py b/scripts/validation/IHCs/run_evaluation.py
@@ -12,7 +12,7 @@
 ANNOTATION_FOLDERS = ["consensus_annotation"]
 
 
-def run_evaluation(root, annotation_folders, result_file, cache_folder, segmentation_name):
+def run_evaluation(root, annotation_folders, result_file, cache_folder, segmentation_name, exclude):
     results = {
         "annotator": [],
         "cochlea": [],
@@ -38,7 +38,8 @@ def run_evaluation(root, annotation_folders, result_file, cache_folder, segmenta
             segmentation, annotations = fetch_data_for_evaluation(
                 annotation_path, components_for_postprocessing=[component],
                 seg_name=segmentation_name,
-                cache_path=None if cache_folder is None else os.path.join(cache_folder, f"{cochlea}_{slice_id}.tif")
+                cache_path=None if cache_folder is None else os.path.join(cache_folder, f"{cochlea}_{slice_id}.tif"),
+                exclude_zero_synapse_count=exclude,
             )
             scores = compute_scores_for_annotated_slice(segmentation, annotations, matching_tolerance=5)
             results["annotator"].append(annotator)
@@ -59,10 +60,11 @@ def main():
     parser.add_argument("-i", "--input", default=ROOT)
     parser.add_argument("--folders", default=ANNOTATION_FOLDERS)
     parser.add_argument("--result_file", default="results.csv")
-    parser.add_argument("--segmentation_name", default="IHC_v4")
+    parser.add_argument("--segmentation_name", default="IHC_v4c")
     parser.add_argument("--cache_folder")
+    parser.add_argument("--exclude", action="store_true")
     args = parser.parse_args()
-    run_evaluation(args.input, args.folders, args.result_file, args.cache_folder, args.segmentation_name)
+    run_evaluation(args.input, args.folders, args.result_file, args.cache_folder, args.segmentation_name, args.exclude)
 
 
 if __name__ == "__main__":