Export table with annotation parameters

schilling40 · schilling40 · commit d742c76bcf9d · 2025-11-03T17:51:10.000+01:00
diff --git a/flamingo_tools/segmentation/chreef_utils.py b/flamingo_tools/segmentation/chreef_utils.py
@@ -121,7 +121,7 @@ def check_overlap(ref_id):
     with futures.ThreadPoolExecutor(n_threads) as pool:
         results = list(tqdm(pool.map(check_overlap, ref_ids), total=len(ref_ids)))
 
-    matching_ids = {r for r in results if r is not None}
+    matching_ids = [r for r in results if r is not None]
     return matching_ids
 
 
@@ -141,39 +141,64 @@ def find_inbetween_ids(
         A list of the ids that are in between the respective thresholds.
     """
     # negative annotation == 1, positive annotation == 2
-    negexc_negatives = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
-    allweak_positives = find_overlapping_masks(arr_allweak, roi_seg, label_id_base=2)
-    inbetween_ids = [int(i) for i in set(negexc_negatives).intersection(set(allweak_positives))]
-    return inbetween_ids, allweak_positives, negexc_negatives
+    negexc_neg = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
+    allweak_pos = find_overlapping_masks(arr_allweak, roi_seg, label_id_base=2)
 
+    negexc_pos = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=2)
+    allweak_neg = find_overlapping_masks(arr_allweak, roi_seg, label_id_base=1)
+    inbetween_ids = [int(i) for i in set(negexc_neg).intersection(set(allweak_pos))]
+    return inbetween_ids, allweak_pos, negexc_neg, allweak_neg, negexc_pos
 
-def get_median_intensity(file_negexc, file_allweak, center, data_seg, table, column="median",
+
+def get_crop_parameters(file_negexc, file_allweak, center, data_seg, table, column="median",
                          resolution=0.38):
     arr_negexc = tifffile.imread(file_negexc)
     arr_allweak = tifffile.imread(file_allweak)
+    param_dic = {}
 
     roi_halo = tuple([r // 2 for r in arr_negexc.shape])
     roi = get_roi(center, roi_halo, resolution=resolution)
 
     roi_seg = data_seg[roi]
-    inbetween_ids, allweak_positives, negexc_negatives = find_inbetween_ids(arr_negexc, arr_allweak, roi_seg)
+    inbetween_ids, allweak_pos, negexc_neg, allweak_neg, negexc_pos = find_inbetween_ids(arr_negexc,
+                                                                                         arr_allweak, roi_seg)
+
+    param_dic["inbetween_ids"] = inbetween_ids
+    param_dic["allweak_pos"] = allweak_pos
+    param_dic["allweak_neg"] = allweak_neg
+    param_dic["negexc_neg"] = negexc_neg
+    param_dic["negexc_pos"] = negexc_pos
+
+    subset_allweak_pos = table[table["label_id"].isin(allweak_pos)]
+    subset_allweak_neg = table[table["label_id"].isin(allweak_neg)]
+    subset_negexc_neg = table[table["label_id"].isin(negexc_neg)]
+    subset_negexc_pos = table[table["label_id"].isin(negexc_pos)]
+    param_dic["allweak_pos_mean"] = float(subset_allweak_pos[column].mean())
+    param_dic["allweak_neg_mean"] = float(subset_allweak_neg[column].mean())
+    param_dic["negexc_neg_mean"] = float(subset_negexc_neg[column].mean())
+    param_dic["negexc_pos_mean"] = float(subset_negexc_pos[column].mean())
+
     if len(inbetween_ids) == 0:
-        if len(allweak_positives) == 0 and len(negexc_negatives) == 0:
-            return None
+        if len(allweak_pos) == 0 and len(negexc_neg) == 0:
+            param_dic["median_intensity"] = None
+            return param_dic
 
-        subset_positive = table[table["label_id"].isin(allweak_positives)]
-        subset_negative = table[table["label_id"].isin(negexc_negatives)]
+        subset_positive = table[table["label_id"].isin(allweak_pos)]
+        subset_negative = table[table["label_id"].isin(negexc_neg)]
         lowest_positive = float(subset_positive[column].min())
         highest_negative = float(subset_negative[column].max())
         if np.isnan(lowest_positive) or np.isnan(highest_negative):
-            return None
+            param_dic["median_intensity"] = None
+            return param_dic
 
-        return np.average([lowest_positive, highest_negative])
+        param_dic["median_intensity"] = np.average([lowest_positive, highest_negative])
+        return param_dic
 
     subset = table[table["label_id"].isin(inbetween_ids)]
     intensities = list(subset[column])
+    param_dic["median_intensity"] = np.median(list(intensities))
 
-    return np.median(list(intensities))
+    return param_dic
 
 
 def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure, column="median", pattern=None,
@@ -188,12 +213,14 @@ def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure
         print(f"Getting median intensities for {center_coord}.")
         file_pos = annotation_dic[center_str]["file_pos"]
         file_neg = annotation_dic[center_str]["file_neg"]
-        median_intensity = get_median_intensity(file_neg, file_pos, center_coord, data_seg,
+        param_dic = get_crop_parameters(file_neg, file_pos, center_coord, data_seg,
                                                 table_measure, column=column, resolution=resolution)
 
+        median_intensity = param_dic["median_intensity"]
         if median_intensity is None:
             print(f"No threshold identified for {center_str}.")
 
-        annotation_dic[center_str]["median_intensity"] = median_intensity
+        for key in param_dic.keys():
+            annotation_dic[center_str][key] = param_dic[key]
 
     return annotation_dic
diff --git a/scripts/measurements/evaluate_marker_annotations_subtype.py b/scripts/measurements/evaluate_marker_annotations_subtype.py
@@ -13,14 +13,15 @@
 # The cochlea for the CHReef analysis.
 
 COCHLEAE = {
+    "M_LR_000099_L": {"seg_data": "PV_SGN_v2", "subtype": ["Calb1", "Lypd1"], "intensity": "ratio"},
     "M_AMD_N180_L": {"seg_data": "SGN_merged", "subtype": ["CR", "Lypd1", "Ntng1"], "intensity": "absolute"},
     "M_AMD_N180_R": {"seg_data": "SGN_merged", "subtype": ["CR", "Ntng1"], "intensity": "absolute"},
-    "M_LR_000099_L": {"seg_data": "PV_SGN_v2", "subtype": ["Calb1", "Lypd1"], "intensity": "ratio"},
+    "M_LR_000098_L": {"seg_data": "SGN_v2", "subtype": ["CR", "Ntng1"], "intensity": "ratio"},
     "M_LR_000184_L": {"seg_data": "SGN_v2", "subtype": ["Prph"], "output_seg": "SGN_v2b", "intensity": "ratio"},
     "M_LR_000184_R": {"seg_data": "SGN_v2", "subtype": ["Prph"], "output_seg": "SGN_v2b", "intensity": "ratio"},
     "M_LR_000214_L": {"seg_data": "PV_SGN_v2", "subtype": ["Calb1"], "intensity": "ratio"},
     "M_LR_000260_L": {"seg_data": "SGN_v2", "subtype": ["Prph", "Tuj1"], "intensity": "ratio"},
-
+    "M_LR_N152_L": {"seg_data": "SGN_v2", "subtype": ["CR", "Ntng1"], "intensity": "ratio"},
 }
 
 
@@ -135,7 +136,34 @@ def find_thresholds(cochlea_annotations, cochlea, data_seg, table_measurement, c
             "annotation_missing": annotator_missing,
         }
 
-    return intensity_dic
+    return intensity_dic, annotation_dics
+
+
+def get_annotation_table(annotation_dics, subtype):
+    rows = []
+    for annotation_dir, annotation_dic in annotation_dics.items():
+
+        annotator_dir = os.path.basename(annotation_dir)
+        annotator = annotator_dir.split("_")[1]
+        for center_str in annotation_dic["center_strings"]:
+            row = {"annotator" : annotator}
+            row["subtype"] = subtype
+            row["center_str"] = center_str
+            row["median_intensity"] = annotation_dic[center_str]["median_intensity"]
+            row["inbetween_ids"] = len(annotation_dic[center_str]["inbetween_ids"])
+            row["allweak_pos"] = len(annotation_dic[center_str]["allweak_pos"])
+            row["allweak_neg"] = len(annotation_dic[center_str]["allweak_neg"])
+            row["negexc_pos"] = len(annotation_dic[center_str]["negexc_pos"])
+            row["negexc_neg"] = len(annotation_dic[center_str]["negexc_neg"])
+
+            row["allweak_pos_mean"] = annotation_dic[center_str]["allweak_pos_mean"]
+            row["allweak_neg_mean"] = annotation_dic[center_str]["allweak_neg_mean"]
+            row["negexc_pos_mean"] = annotation_dic[center_str]["negexc_pos_mean"]
+            row["negexc_neg_mean"] = annotation_dic[center_str]["negexc_neg_mean"]
+            rows.append(row)
+
+    df = pd.DataFrame(rows)
+    return df
 
 
 def evaluate_marker_annotation(
@@ -181,7 +209,8 @@ def evaluate_marker_annotation(
         subtypes = COCHLEAE[cochlea]["subtype"]
         subtype_str = "_".join(subtypes)
         out_path = os.path.join(output_dir, f"{cochlea_str}_{subtype_str}_{seg_string}.tsv")
-        if os.path.exists(out_path) and not force:
+        annot_out = os.path.join(output_dir, f"{cochlea_str}_{subtype_str}_{seg_string}_annotations.tsv")
+        if os.path.exists(out_path) and os.path.exists(annot_out) and not force:
             continue
 
         # Get the segmentation data and table.
@@ -198,6 +227,7 @@ def evaluate_marker_annotation(
         intensity_mode = COCHLEAE[cochlea]["intensity"]
 
         # iterate through subtypes
+        annot_table = None
         for subtype in subtypes:
             pattern = subtype
             if intensity_mode == "ratio":
@@ -218,8 +248,14 @@ def evaluate_marker_annotation(
             print(f"Evaluating data for cochlea {cochlea} in {cochlea_annotations}.")
 
             # Find the thresholds from the annotated blocks and save them if specified.
-            intensity_dic = find_thresholds(cochlea_annotations, cochlea, data_seg,
+            intensity_dic, annot_dic = find_thresholds(cochlea_annotations, cochlea, data_seg,
                                             table_measurement, column=column, pattern=pattern)
+
+            if annot_table is None:
+                annot_table = get_annotation_table(annot_dic, subtype)
+            else:
+                annot_table = pd.concat([annot_table, get_annotation_table(annot_dic, subtype)], ignore_index=True)
+
             if threshold_save_dir is not None:
                 os.makedirs(threshold_save_dir, exist_ok=True)
                 threshold_out_path = os.path.join(threshold_save_dir, f"{cochlea_str}_{subtype}_{seg_string}.json")
@@ -241,7 +277,11 @@ def evaluate_marker_annotation(
 
         # Save the table with positives / negatives for all SGNs.
         os.makedirs(output_dir, exist_ok=True)
-        table_seg.to_csv(out_path, sep="\t", index=False)
+
+        if not os.path.exists(out_path) or force:
+            table_seg.to_csv(out_path, sep="\t", index=False)
+        if not os.path.exists(annot_out) or force:
+            annot_table.to_csv(annot_out, sep="\t", index=False)
 
 
 def main():