Apply threshold on marker stain

schilling40 · schilling40 · commit 2ae7bd175998 · 2025-08-03T19:12:08.000+02:00
diff --git a/flamingo_tools/segmentation/chreef_utils.py b/flamingo_tools/segmentation/chreef_utils.py
@@ -1,5 +1,4 @@
 import os
-import math
 import multiprocessing as mp
 from concurrent import futures
 from typing import List, Tuple
@@ -9,45 +8,45 @@
 from tqdm import tqdm
 
 
-def find_annotations(annotation_dir, cochleae=None) -> dict:
+def coord_from_string(center_str):
+    return tuple([int(c) for c in center_str.split("-")])
+
+
+def find_annotations(annotation_dir, cochlea) -> dict:
     """Create dictionary for analysis of ChReef annotations.
     Annotations should have format positive-negative_<cochlea>_crop_<coord>_allNegativeExcluded_thr<thr>.tif
 
     Args:
         annotation_dir: Directory containing annotations.
     """
 
-    def extract_center_crop(cochlea, name):
+    def extract_center_string(cochlea, name):
         # Extract center crop coordinate from file name
         crop_suffix = name.split(f"{cochlea}_crop_")[1]
-        coord_str = crop_suffix.split("_")[0]
-        coord = tuple([int(c) for c in coord_str.split("-")])
-        return coord
-
-    def extract_cochlea_str(name):
-        # Extract cochlea str from annotation file name.
-        cochlea_suffix = name.split("negative_")[1]
-        cochlea = cochlea_suffix.split("_crop")[0]
-        return cochlea
-
-    file_names = [entry.name for entry in os.scandir(annotation_dir)]
-    if cochleae is None:
-        cochleae = list(set([extract_cochlea_str(file_name) for file_name in file_names]))
-
-    annotation_dic = {}
-    for cochlea in cochleae:
-        cochlea_files = [entry.name for entry in os.scandir(annotation_dir) if cochlea in entry.name]
-        dic = {"cochlea": cochlea}
-        dic["cochlea_files"] = cochlea_files
-        center_crops = list(set([extract_center_crop(cochlea, name=file_name) for file_name in cochlea_files]))
-        dic["center_coords"] = center_crops
-        dic["center_str"] = [("-").join([str(c).zfill(4) for center_crop in center_crops for c in center_crop])]
-        for center_str in dic["center_str"]:
-            file_neg = [c for c in cochlea_files if all(x in c for x in [cochlea, center_str, "NegativeExcluded"])][0]
-            file_pos = [c for c in cochlea_files if all(x in c for x in [cochlea, center_str, "WeakPositive"])][0]
-            dic[center_str] = {"file_neg": file_neg, "file_pos": file_pos}
-        annotation_dic[cochlea] = dic
-    return annotation_dic
+        center_str = crop_suffix.split("_")[0]
+        return center_str
+
+    cochlea_files = [entry.name for entry in os.scandir(annotation_dir) if cochlea in entry.name]
+    dic = {"cochlea": cochlea}
+    dic["cochlea_files"] = cochlea_files
+    center_strings = list(set([extract_center_string(cochlea, name=f) for f in cochlea_files]))
+    center_strings.sort()
+    dic["center_strings"] = center_strings
+    remove_strings = []
+    for center_str in center_strings:
+        files_neg = [c for c in cochlea_files if all(x in c for x in [cochlea, center_str, "NegativeExcluded"])]
+        files_pos = [c for c in cochlea_files if all(x in c for x in [cochlea, center_str, "WeakPositive"])]
+        if len(files_neg) != 1 or len(files_pos) != 1:
+            print(f"Skipping crop {center_str} for cochlea {cochlea}. "
+                  f"Missing or multiple annotation files in {annotation_dir}.")
+            remove_strings.append(center_str)
+        else:
+            dic[center_str] = {"file_neg": os.path.join(annotation_dir, files_neg[0]),
+                               "file_pos": os.path.join(annotation_dir, files_pos[0])}
+    for rm_str in remove_strings:
+        dic["center_strings"].remove(rm_str)
+
+    return dic
 
 
 def get_roi(coord: tuple, roi_halo: tuple, resolution: float = 0.38) -> Tuple[int]:
@@ -106,7 +105,7 @@ def check_overlap(ref_id):
             return None
 
     n_threads = min(16, mp.cpu_count())
-    print(f"Parallelizing with {n_threads} Threads.")
+    print(f"Finding overlapping masks with {n_threads} Threads.")
     with futures.ThreadPoolExecutor(n_threads) as pool:
         results = list(tqdm(pool.map(check_overlap, ref_ids), total=len(ref_ids)))
 
@@ -129,7 +128,7 @@ def find_inbetween_ids(
     # negative annotation == 1, positive annotation == 2
     negexc_negatives = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
     allweak_positives = find_overlapping_masks(arr_allweak, roi_seg, label_id_base=2)
-    inbetween_ids = list(set(negexc_negatives) & set(allweak_positives))
+    inbetween_ids = [int(i) for i in set(negexc_negatives).intersection(set(allweak_positives))]
     return inbetween_ids
 
 
@@ -142,26 +141,24 @@ def get_median_intensity(file_negexc, file_allweak, center, data_seg, table):
 
     roi_seg = data_seg[roi]
     inbetween_ids = find_inbetween_ids(arr_negexc, arr_allweak, roi_seg)
-    intensities = table.loc[table["label_id"].isin(inbetween_ids), table["mean"]]
+    subset = table[table["label_id"].isin(inbetween_ids)]
+    intensities = list(subset["median"])
     return np.median(list(intensities))
 
 
-def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure, table_block=None):
-    annotation_dic = find_annotations(annotation_dir, cochleae=[cochlea])
-    for key in annotation_dic.keys():
-        dic = annotation_dic[key]
-        for center_coord, center_str in zip(dic["center_coords"], dic["center_str"]):
-            file_pos = dic[center_str["file_pos"]]
-            file_neg = dic[center_str["file_neg"]]
-            median_intensity = get_median_intensity(file_neg, file_pos, center_coord, data_seg, table_measure)
-
-            annotation_dic[key][center_str]["median_intensity"] = median_intensity
-            if table_block is not None:
-                block_centers = table_block["crop_centers"]
-                for num, block_center in enumerate(block_centers):
-                    dist = math.dist(tuple(block_centers), center_coord)
-                    if dist < 5:
-                        annotation_dic[key][center_str]["block_index"] = num
-                        annotation_dic[key][center_str]["block_center"] = block_center
-
-    return annotation_dic[cochlea]
+def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure):
+    """Find median intensities in blocks and assign them to center positions of cropped block.
+    """
+    annotation_dic = find_annotations(annotation_dir, cochlea)
+    # center_keys = [key for key in annotation_dic["center_strings"] if key in annotation_dic.keys()]
+
+    for center_str in annotation_dic["center_strings"]:
+        center_coord = coord_from_string(center_str)
+        print(f"Getting mean intensities for {center_coord}.")
+        file_pos = annotation_dic[center_str]["file_pos"]
+        file_neg = annotation_dic[center_str]["file_neg"]
+        median_intensity = get_median_intensity(file_neg, file_pos, center_coord, data_seg, table_measure)
+
+        annotation_dic[center_str]["median_intensity"] = median_intensity
+
+    return annotation_dic
diff --git a/scripts/measurements/evaluate_marker_annotations.py b/scripts/measurements/evaluate_marker_annotations.py
@@ -0,0 +1,175 @@
+import argparse
+import os
+from typing import List, Optional
+
+import pandas as pd
+
+from flamingo_tools.s3_utils import get_s3_path
+from flamingo_tools.file_utils import read_image_data
+from flamingo_tools.segmentation.chreef_utils import localize_median_intensities, find_annotations
+
+MARKER_DIR = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/ChReef_PV-GFP/2025-07_PV_GFP_SGN"
+
+
+def get_length_fraction_from_center(table, center_str):
+    """ Get 'length_fraction' parameter for center coordinate by averaging nearby segmentation instances.
+    """
+    center_coord = tuple([int(c) for c in center_str.split("-")])
+    (cx, cy, cz) = center_coord
+    offset = 20
+    subset = table[
+        (cx - offset < table["anchor_x"]) &
+        (table["anchor_x"] < cx + offset) &
+        (cy - offset < table["anchor_y"]) &
+        (table["anchor_y"] < cy + offset) &
+        (cz - offset < table["anchor_z"]) &
+        (table["anchor_z"] < cz + offset)
+    ]
+    length_fraction = list(subset["length_fraction"])
+    length_fraction = float(sum(length_fraction) / len(length_fraction))
+    return length_fraction
+
+
+def apply_nearest_threshold(intensity_dic, table_seg, table_measurement):
+    """Apply threshold to nearest segmentation instances.
+    Crop centers are transformed into the 'length fraction' parameter of the segmentation table.
+    This avoids issues with the spiral shape of the cochlea and maps the assignment onto the Rosenthal's canal.
+    """
+    # assign crop centers to length fraction of Rosenthal's canal
+    lf_intensity = {}
+    for key in intensity_dic.keys():
+        length_fraction = get_length_fraction_from_center(table_seg, key)
+        intensity_dic[key]["length_fraction"] = length_fraction
+        lf_intensity[length_fraction] = {"threshold": intensity_dic[key]["median_intensity"]}
+
+    # get limits for checking marker thresholds
+    lf_intensity = dict(sorted(lf_intensity.items()))
+    lf_fractions = list(lf_intensity.keys())
+    # start of cochlea
+    lf_limits = [0]
+    # half distance between block centers
+    for i in range(len(lf_fractions) - 1):
+        lf_limits.append((lf_fractions[i] + lf_fractions[i+1]) / 2)
+    # end of cochlea
+    lf_limits.append(1)
+
+    marker_labels = [0 for _ in range(len(table_seg))]
+    table_seg.loc[:, "marker_labels"] = marker_labels
+    for num, fraction in enumerate(lf_fractions):
+        subset_seg = table_seg[
+            (table_seg["length_fraction"] > lf_limits[num]) &
+            (table_seg["length_fraction"] < lf_limits[num + 1])
+        ]
+        # assign values based on limits
+        threshold = lf_intensity[fraction]["threshold"]
+        label_ids_seg = subset_seg["label_id"]
+
+        subset_measurement = table_measurement[table_measurement["label_id"].isin(label_ids_seg)]
+        subset_positive = subset_measurement[subset_measurement["median"] >= threshold]
+        subset_negative = subset_measurement[subset_measurement["median"] < threshold]
+        label_ids_pos = list(subset_positive["label_id"])
+        label_ids_neg = list(subset_negative["label_id"])
+
+        table_seg.loc[table_seg["label_id"].isin(label_ids_pos), "marker_labels"] = 1
+        table_seg.loc[table_seg["label_id"].isin(label_ids_neg), "marker_labels"] = 2
+
+    return table_seg
+
+
+def evaluate_marker_annotation(
+    cochleae,
+    output_dir: str,
+    annotation_dirs: Optional[List[str]] = None,
+    seg_name: str = "SGN_v2",
+    marker_name: str = "GFP",
+):
+    """Evaluate marker annotations of a single or multiple annotators.
+    Segmentation instances are assigned a positive (1) or negative label (2)
+    in form of the "marker_label" component of the output segmentation table.
+    The assignment is based on the median intensity supplied by a measurement table.
+    Instances not considered for the assignment are labeled as 0.
+
+    Args:
+        cochleae: List of cochlea
+        output_dir: Output directory for segmentation table with 'marker_label' in format <cochlea>_<marker>_<seg>.tsv
+        annotation_dirs: List of directories containing marker annotations by annotator(s).
+        seg_name: Identifier for segmentation.
+        marker_name: Identifier for marker stain.
+    """
+    input_key = "s0"
+
+    if annotation_dirs is None:
+        if "MARKER_DIR" in globals():
+            marker_dir = MARKER_DIR
+            annotation_dirs = [entry.path for entry in os.scandir(marker_dir)
+                               if os.path.isdir(entry) and "Results" in entry.name]
+
+    for cochlea in cochleae:
+        cochlea_annotations = [a for a in annotation_dirs if len(find_annotations(a, cochlea)["center_strings"]) != 0]
+        print(f"Evaluating data for cochlea {cochlea} in {cochlea_annotations}.")
+
+        # get segmentation data
+        input_path = f"{cochlea}/images/ome-zarr/{seg_name}.ome.zarr"
+        input_path, fs = get_s3_path(input_path)
+        data_seg = read_image_data(input_path, input_key)
+
+        table_seg_path = f"{cochlea}/tables/{seg_name}/default.tsv"
+        table_path_s3, fs = get_s3_path(table_seg_path)
+        with fs.open(table_path_s3, "r") as f:
+            table_seg = pd.read_csv(f, sep="\t")
+
+        seg_string = "-".join(seg_name.split("_"))
+        table_measurement_path = f"{cochlea}/tables/{seg_name}/{marker_name}_{seg_string}_object-measures.tsv"
+        table_path_s3, fs = get_s3_path(table_measurement_path)
+        with fs.open(table_path_s3, "r") as f:
+            table_measurement = pd.read_csv(f, sep="\t")
+
+        # find median intensities by averaging all individual annotations for specific crops
+        annotation_dics = {}
+        annotated_centers = []
+        for annotation_dir in cochlea_annotations:
+
+            annotation_dic = localize_median_intensities(annotation_dir, cochlea, data_seg, table_measurement)
+            annotated_centers.extend(annotation_dic["center_strings"])
+            annotation_dics[annotation_dir] = annotation_dic
+
+        annotated_centers = list(set(annotated_centers))
+        intensity_dic = {}
+        # loop over all annotated blocks
+        for annotated_center in annotated_centers:
+            intensities = []
+            # loop over annotated block from single user
+            for annotator_key in annotation_dics.keys():
+                if annotated_center not in annotation_dics[annotator_key]["center_strings"]:
+                    continue
+                else:
+                    intensities.append(annotation_dics[annotator_key][annotated_center]["median_intensity"])
+            intensity_dic[annotated_center] = {"median_intensity": float(sum(intensities) / len(intensities))}
+
+        table_seg = apply_nearest_threshold(intensity_dic, table_seg, table_measurement)
+        cochlea_str = "-".join(cochlea.split("_"))
+        out_path = os.path.join(output_dir, f"{cochlea_str}_{marker_name}_{seg_string}.tsv")
+        table_seg.to_csv(out_path, sep="\t", index=False)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Assign each segmentation instance a marker based on annotation thresholds.")
+
+    parser.add_argument('-c', "--cochlea", type=str, nargs="+", required=True,
+                        help="Cochlea(e) to process.")
+    parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.")
+
+    parser.add_argument('-a', '--annotation_dirs', type=str, nargs="+", default=None,
+                        help="Directories containing marker annotations.")
+
+    args = parser.parse_args()
+
+    evaluate_marker_annotation(
+            args.cochlea, args.output, args.annotation_dirs,
+    )
+
+
+if __name__ == "__main__":
+
+    main()