Merge pull request #60 from computational-cell-analytics/marker_annotation_checkup

schilling40 · web-flow · commit bd03d5e598cd · 2025-08-13T17:13:30.000+02:00
Intensity values for visualizing SGNs can now be read from their object measure table. A warning is issued if the label ID of a segmentation instance is not found.

The merge also includes minor changes to the export of synapses and marker volumes at lower resolutions.
diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
@@ -1,5 +1,6 @@
 import multiprocessing as mp
 import os
+import warnings
 from concurrent import futures
 from functools import partial
 from typing import List, Optional, Tuple
@@ -188,6 +189,25 @@ def _regionprops_features(seg_id, table, image, segmentation, resolution, backgr
     return features
 
 
+def get_object_measures_from_table(arr_seg, table):
+    """Return object measurements for label IDs wthin array.
+    """
+    # iterate through segmentation ids in reference mask
+    ref_ids = list(np.unique(arr_seg)[1:])
+    measure_ids = list(table["label_id"])
+    object_ids = [id for id in ref_ids if id in measure_ids]
+    if len(object_ids) < len(ref_ids):
+        warnings.warn(f"Not all IDs were found in measurement table. Using {len(object_ids)}/{len(ref_ids)}.")
+
+    median_values = [table.at[table.index[table["label_id"] == label_id][0], "median"] for label_id in object_ids]
+
+    measures = pd.DataFrame({
+        "label_id": object_ids,
+        "median": median_values,
+    })
+    return measures
+
+
 # Maybe also support:
 # - spherical harmonics
 # - line profiles
diff --git a/scripts/export_lower_resolution.py b/scripts/export_lower_resolution.py
@@ -96,6 +96,31 @@ def filter_cochlea(
                                      dilation_iterations=dilation_iterations)
 
 
+def filter_marker_instances(cochlea, segmentation, seg_name):
+    """Filter segmentation with marker labels.
+    Positive segmentation instances are set to 1, negative to 2.
+    """
+    internal_path = os.path.join(cochlea, "tables",  seg_name, "default.tsv")
+    tsv_path, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
+    with fs.open(tsv_path, "r") as f:
+        table_seg = pd.read_csv(f, sep="\t")
+
+    label_ids_positive = list(table_seg.loc[table_seg["marker_labels"] == 1, "label_id"])
+    label_ids_negative = list(table_seg.loc[table_seg["marker_labels"] == 2, "label_id"])
+
+    label_ids_marker = label_ids_positive + label_ids_negative
+    filter_mask = ~np.isin(segmentation, label_ids_marker)
+    segmentation[filter_mask] = 0
+
+    filter_mask = np.isin(segmentation, label_ids_positive)
+    segmentation[filter_mask] = 1
+    filter_mask = np.isin(segmentation, label_ids_negative)
+    segmentation[filter_mask] = 2
+
+    segmentation = segmentation.astype("uint16")
+    return segmentation
+
+
 def upscale_volume(
     target_data: np.ndarray,
     downscaled_volume: np.ndarray,
@@ -146,6 +171,9 @@ def export_lower_resolution(args):
         input_key = f"s{scale}"
         for channel in args.channels:
             out_path = os.path.join(output_folder, f"{channel}.tif")
+            if args.filter_marker_labels:
+                out_path = os.path.join(output_folder, f"{channel}_marker.tif")
+
             if os.path.exists(out_path):
                 continue
 
@@ -156,7 +184,11 @@ def export_lower_resolution(args):
                 data = f[input_key][:]
             print("Data shape", data.shape)
             if args.filter_by_components is not None:
+                print(f"Filtering channel {channel} by components {args.filter_by_components}.")
                 data = filter_component(fs, data, args.cochlea, channel, args.filter_by_components)
+            if args.filter_marker_labels:
+                print(f"Filtering marker instances for channel {channel}.")
+                data = filter_marker_instances(args.cochlea, data, channel)
             if args.filter_cochlea_channels is not None:
                 us_factor = ds_factor // (2 ** scale)
                 upscaled_filter = upscale_volume(data, filter_volume, upscale_factor=us_factor)
@@ -181,6 +213,7 @@ def main():
     parser.add_argument("--filter_ihc_components", nargs="+", type=int, default=[1])
     parser.add_argument("--binarize", action="store_true")
     parser.add_argument("--filter_cochlea_channels", nargs="+", type=str, default=None)
+    parser.add_argument("--filter_marker_labels", action="store_true")
     parser.add_argument("--filter_dilation_iterations", type=int, default=8)
     args = parser.parse_args()
 
diff --git a/scripts/export_synapse_detections.py b/scripts/export_synapse_detections.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import os
+from typing import List
 
 import numpy as np
 import pandas as pd
@@ -12,7 +13,30 @@
 from tqdm import tqdm
 
 
-def export_synapse_detections(cochlea, scale, output_folder, synapse_name, reference_ihcs, max_dist, radius, id_offset):
+def export_synapse_detections(
+    cochlea: str,
+    scales: List[int],
+    output_folder: str,
+    synapse_name: str,
+    reference_ihcs: str,
+    max_dist: float,
+    radius: float,
+    id_offset: int,
+    filter_ihc_components: List[int],
+):
+    """Export synapse detections fro lower resolutions.
+
+    Args:
+        cochlea: Cochlea name on S3 bucket.
+        scales: Scale for export of lower resolution.
+        output_folder:
+        synapse_name:
+        reference_ihcs: Name of IHC segmentation.
+        max_dist: Maximal distance of synapse to IHC segmentation.
+        radius:
+        id_offset: Offset of label id of synapse output to have different colours for visualization.
+        filter_ihc_components: Component label(s) for filtering IHC segmentation.
+    """
     s3 = create_s3_target()
 
     content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
@@ -36,67 +60,69 @@ def export_synapse_detections(cochlea, scale, output_folder, synapse_name, refer
     seg_table = pd.read_csv(seg_table_content, sep="\t")
 
     # Only keep synapses that match to segmented IHCs of the main component.
-    valid_ihcs = seg_table[seg_table.component_labels == 1].label_id
+    valid_ihcs = seg_table[seg_table.component_labels.isin(filter_ihc_components)].label_id
     syn_table = syn_table[syn_table.matched_ihc.isin(valid_ihcs)]
 
-    # Get the reference shape at the given scale level.
-    seg_path = os.path.join(cochlea, reference_seg_info["imageData"]["ome.zarr"]["relativePath"])
-    s3_store, _ = get_s3_path(seg_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
-    input_key = f"s{scale}"
-    with zarr.open(s3_store, mode="r") as f:
-        shape = f[input_key].shape
-
-    # Scale the coordinates according to the scale level.
-    resolution = 0.38
-    coordinates = syn_table[["z", "y", "x"]].values
-    coordinates /= resolution
-    coordinates /= (2 ** scale)
-    coordinates = np.round(coordinates, 0).astype("int")
-
-    ihc_ids = syn_table["matched_ihc"].values
-
-    # Create the output.
-    output = np.zeros(shape, dtype="uint16")
-    mask = ball(radius).astype(bool)
-
-    for coord, matched_ihc in tqdm(
-        zip(coordinates, ihc_ids), total=len(coordinates), desc="Writing synapses to volume"
-    ):
-        bb = tuple(slice(c - radius, c + radius + 1) for c in coord)
-        try:
-            output[bb][mask] = matched_ihc + id_offset
-        except IndexError:
-            print("Index error for", coord)
-            continue
-
-    # Write the output.
-    out_folder = os.path.join(output_folder, cochlea, f"scale{scale}")
-    os.makedirs(out_folder, exist_ok=True)
-    if id_offset != 0:
-        out_path = os.path.join(out_folder, f"{synapse_name}_offset{id_offset}.tif")
-    else:
-        out_path = os.path.join(out_folder, f"{synapse_name}.tif")
-    print("Writing synapses to", out_path)
-    tifffile.imwrite(out_path, output, bigtiff=True, compression="zlib")
+    for scale in scales:
+        # Get the reference shape at the given scale level.
+        seg_path = os.path.join(cochlea, reference_seg_info["imageData"]["ome.zarr"]["relativePath"])
+        s3_store, _ = get_s3_path(seg_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
+        input_key = f"s{scale}"
+        with zarr.open(s3_store, mode="r") as f:
+            shape = f[input_key].shape
+
+        # Scale the coordinates according to the scale level.
+        resolution = 0.38
+        coordinates = syn_table[["z", "y", "x"]].values
+        coordinates /= resolution
+        coordinates /= (2 ** scale)
+        coordinates = np.round(coordinates, 0).astype("int")
+
+        ihc_ids = syn_table["matched_ihc"].values
+
+        # Create the output.
+        output = np.zeros(shape, dtype="uint16")
+        mask = ball(radius).astype(bool)
+
+        for coord, matched_ihc in tqdm(
+            zip(coordinates, ihc_ids), total=len(coordinates), desc="Writing synapses to volume"
+        ):
+            bb = tuple(slice(c - radius, c + radius + 1) for c in coord)
+            try:
+                output[bb][mask] = matched_ihc + id_offset
+            except IndexError:
+                print("Index error for", coord)
+                continue
+
+        # Write the output.
+        out_folder = os.path.join(output_folder, cochlea, f"scale{scale}")
+        os.makedirs(out_folder, exist_ok=True)
+        if id_offset != 0:
+            out_path = os.path.join(out_folder, f"{synapse_name}_offset{id_offset}.tif")
+        else:
+            out_path = os.path.join(out_folder, f"{synapse_name}.tif")
+        print("Writing synapses to", out_path)
+        tifffile.imwrite(out_path, output, bigtiff=True, compression="zlib")
 
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--cochlea", "-c", required=True)
-    parser.add_argument("--scale", "-s", type=int, required=True)
+    parser.add_argument("--scale", "-s", nargs="+", type=int, required=True)
     parser.add_argument("--output_folder", "-o", required=True)
     parser.add_argument("--synapse_name", default="synapse_v3_ihc_v4b")
     parser.add_argument("--reference_ihcs", default="IHC_v4b")
     parser.add_argument("--max_dist", type=float, default=3.0)
     parser.add_argument("--radius", type=int, default=3)
     parser.add_argument("--id_offset", type=int, default=0)
+    parser.add_argument("--filter_ihc_components", nargs="+", type=int, default=[1])
     args = parser.parse_args()
 
     export_synapse_detections(
         args.cochlea, args.scale, args.output_folder,
         args.synapse_name, args.reference_ihcs,
         args.max_dist, args.radius,
-        args.id_offset,
+        args.id_offset, args.filter_ihc_components,
     )
 
 
diff --git a/scripts/intensity_annotation/gfp_annotation.py b/scripts/intensity_annotation/gfp_annotation.py
@@ -4,6 +4,7 @@
 import imageio.v3 as imageio
 import napari
 import numpy as np
+import pandas as pd
 
 import matplotlib.pyplot as plt
 import seaborn as sns
@@ -14,7 +15,8 @@
 from elf.parallel.distance_transform import distance_transform
 from elf.parallel.seeded_watershed import seeded_watershed
 
-from flamingo_tools.measurements import compute_object_measures_impl
+from flamingo_tools.measurements import compute_object_measures_impl, get_object_measures_from_table
+from flamingo_tools.s3_utils import get_s3_path
 
 
 class HistogramWidget(QWidget):
@@ -132,7 +134,7 @@ def _create_mask(seg_extended, stain):
     return mask
 
 
-def gfp_annotation(prefix, default_stat="median", background_norm=None, is_otof=False):
+def gfp_annotation(prefix, default_stat="median", background_norm=None, is_otof=False, seg_version=None):
     assert background_norm in (None, "division", "subtraction")
 
     direc = os.path.dirname(os.path.abspath(prefix))
@@ -179,9 +181,22 @@ def gfp_annotation(prefix, default_stat="median", background_norm=None, is_otof=
         mask = _create_mask(seg_extended, stain1)
         assert mask.shape == seg_extended.shape
         feature_set = "default_background_norm" if background_norm == "division" else "default_background_subtract"
-    statistics = compute_object_measures_impl(
-        stain1, seg_extended, feature_set=feature_set, background_mask=mask, median_only=True
-    )
+
+    if seg_version is not None:
+        seg_string = "-".join(seg_version.split("_"))
+        cochlea = os.path.basename(prefix).split("_crop_")[0]
+
+        table_measurement_path = f"{cochlea}/tables/{seg_version}/{stain1_name}_{seg_string}_object-measures.tsv"
+        table_path_s3, fs = get_s3_path(table_measurement_path)
+        with fs.open(table_path_s3, "r") as f:
+            table_measurement = pd.read_csv(f, sep="\t")
+
+        statistics = get_object_measures_from_table(seg, table=table_measurement)
+
+    else:
+        statistics = compute_object_measures_impl(
+            stain1, seg_extended, feature_set=feature_set, background_mask=mask, median_only=True
+        )
 
     # Open the napari viewer.
     v = napari.Viewer()
@@ -281,9 +296,11 @@ def main():
     parser.add_argument("--otof", action="store_true",
                         help="Whether to run the annotation tool for otof samples with VGlut3, "
                         "Alphatag and IHC segmentation.")  # noqa
+    parser.add_argument("--seg_version", type=str, default=None,
+                        help="Supply segmentation version, e.g. SGN_v2, to use intensities from object measure table.")
     args = parser.parse_args()
 
-    gfp_annotation(args.prefix, background_norm=args.background_norm, is_otof=args.otof)
+    gfp_annotation(args.prefix, background_norm=args.background_norm, is_otof=args.otof, seg_version=args.seg_version)
 
 
 if __name__ == "__main__":
diff --git a/scripts/measurements/evaluate_marker_annotations.py b/scripts/measurements/evaluate_marker_annotations.py
@@ -107,20 +107,35 @@ def find_thresholds(cochlea_annotations, cochlea, data_seg, table_measurement):
     # loop over all annotated blocks
     for annotated_center in annotated_centers:
         intensities = []
+        annotator_success = []
+        annotator_failure = []
+        annotator_missing = []
         # loop over annotated block from single user
         for annotator_key in annotation_dics.keys():
             if annotated_center not in annotation_dics[annotator_key]["center_strings"]:
+                annotator_missing.append(os.path.basename(annotator_key))
                 continue
             else:
                 median_intensity = annotation_dics[annotator_key][annotated_center]["median_intensity"]
                 if median_intensity is None:
                     print(f"No threshold for {os.path.basename(annotator_key)} and crop {annotated_center}.")
+                    annotator_failure.append(os.path.basename(annotator_key))
                 else:
                     intensities.append(median_intensity)
+                    annotator_success.append(os.path.basename(annotator_key))
+
         if len(intensities) == 0:
             print(f"No viable annotation for cochlea {cochlea} and crop {annotated_center}.")
+            median_int_avg = None
         else:
-            intensity_dic[annotated_center] = {"median_intensity": float(sum(intensities) / len(intensities))}
+            median_int_avg = float(sum(intensities) / len(intensities)),
+
+        intensity_dic[annotated_center] = {
+            "median_intensity": median_int_avg,
+            "annotation_success": annotator_success,
+            "annotation_failure": annotator_failure,
+            "annotation_missing": annotator_missing,
+        }
 
     return intensity_dic