Merge pull request #61 from computational-cell-analytics/marker_export

schilling40 · web-flow · commit e17d4f4e0b8d · 2025-08-14T16:12:16.000+02:00
The branch adds a script that exports marker annotations as individual binary masks (positive or negative) or a combined mask.
It also features an improved intensity threshold calculation for marker annotations.
When there are no overlapping IDs between "including all weak positives" and "excluding all negatives," the average of the lowest positive and highest negative intensity is used as the threshold.
diff --git a/flamingo_tools/segmentation/chreef_utils.py b/flamingo_tools/segmentation/chreef_utils.py
@@ -137,7 +137,7 @@ def find_inbetween_ids(
     negexc_negatives = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
     allweak_positives = find_overlapping_masks(arr_allweak, roi_seg, label_id_base=2)
     inbetween_ids = [int(i) for i in set(negexc_negatives).intersection(set(allweak_positives))]
-    return inbetween_ids
+    return inbetween_ids, allweak_positives, negexc_negatives
 
 
 def get_median_intensity(file_negexc, file_allweak, center, data_seg, table):
@@ -148,9 +148,19 @@ def get_median_intensity(file_negexc, file_allweak, center, data_seg, table):
     roi = get_roi(center, roi_halo)
 
     roi_seg = data_seg[roi]
-    inbetween_ids = find_inbetween_ids(arr_negexc, arr_allweak, roi_seg)
+    inbetween_ids, allweak_positives, negexc_negatives = find_inbetween_ids(arr_negexc, arr_allweak, roi_seg)
     if len(inbetween_ids) == 0:
-        return None
+        if len(allweak_positives) == 0 and len(negexc_negatives) == 0:
+            return None
+
+        subset_positive = table[table["label_id"].isin(allweak_positives)]
+        subset_negative = table[table["label_id"].isin(negexc_negatives)]
+        lowest_positive = float(subset_positive["median"].min())
+        highest_negative = float(subset_negative["median"].max())
+        if np.isnan(lowest_positive) or np.isnan(highest_negative):
+            return None
+
+        return np.average([lowest_positive, highest_negative])
 
     subset = table[table["label_id"].isin(inbetween_ids)]
     intensities = list(subset["median"])
@@ -172,7 +182,7 @@ def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure
         median_intensity = get_median_intensity(file_neg, file_pos, center_coord, data_seg, table_measure)
 
         if median_intensity is None:
-            print(f"No inbetween IDs found for {center_str}.")
+            print(f"No threshold identified for {center_str}.")
 
         annotation_dic[center_str]["median_intensity"] = median_intensity
 
diff --git a/scripts/export_lower_resolution.py b/scripts/export_lower_resolution.py
@@ -96,31 +96,6 @@ def filter_cochlea(
                                      dilation_iterations=dilation_iterations)
 
 
-def filter_marker_instances(cochlea, segmentation, seg_name):
-    """Filter segmentation with marker labels.
-    Positive segmentation instances are set to 1, negative to 2.
-    """
-    internal_path = os.path.join(cochlea, "tables",  seg_name, "default.tsv")
-    tsv_path, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
-    with fs.open(tsv_path, "r") as f:
-        table_seg = pd.read_csv(f, sep="\t")
-
-    label_ids_positive = list(table_seg.loc[table_seg["marker_labels"] == 1, "label_id"])
-    label_ids_negative = list(table_seg.loc[table_seg["marker_labels"] == 2, "label_id"])
-
-    label_ids_marker = label_ids_positive + label_ids_negative
-    filter_mask = ~np.isin(segmentation, label_ids_marker)
-    segmentation[filter_mask] = 0
-
-    filter_mask = np.isin(segmentation, label_ids_positive)
-    segmentation[filter_mask] = 1
-    filter_mask = np.isin(segmentation, label_ids_negative)
-    segmentation[filter_mask] = 2
-
-    segmentation = segmentation.astype("uint16")
-    return segmentation
-
-
 def upscale_volume(
     target_data: np.ndarray,
     downscaled_volume: np.ndarray,
@@ -186,9 +161,6 @@ def export_lower_resolution(args):
             if args.filter_by_components is not None:
                 print(f"Filtering channel {channel} by components {args.filter_by_components}.")
                 data = filter_component(fs, data, args.cochlea, channel, args.filter_by_components)
-            if args.filter_marker_labels:
-                print(f"Filtering marker instances for channel {channel}.")
-                data = filter_marker_instances(args.cochlea, data, channel)
             if args.filter_cochlea_channels is not None:
                 us_factor = ds_factor // (2 ** scale)
                 upscaled_filter = upscale_volume(data, filter_volume, upscale_factor=us_factor)
@@ -213,7 +185,6 @@ def main():
     parser.add_argument("--filter_ihc_components", nargs="+", type=int, default=[1])
     parser.add_argument("--binarize", action="store_true")
     parser.add_argument("--filter_cochlea_channels", nargs="+", type=str, default=None)
-    parser.add_argument("--filter_marker_labels", action="store_true")
     parser.add_argument("--filter_dilation_iterations", type=int, default=8)
     args = parser.parse_args()
 
diff --git a/scripts/export_lower_resolution_marker.py b/scripts/export_lower_resolution_marker.py
@@ -0,0 +1,91 @@
+import argparse
+import os
+
+import numpy as np
+import pandas as pd
+import tifffile
+import zarr
+
+from flamingo_tools.s3_utils import get_s3_path, BUCKET_NAME, SERVICE_ENDPOINT
+# from skimage.segmentation import relabel_sequential
+
+
+def filter_marker_instances(cochlea, segmentation, seg_name, group=None):
+    """Filter segmentation with marker labels.
+    Positive segmentation instances are set to 1, negative to 2.
+    """
+    internal_path = os.path.join(cochlea, "tables",  seg_name, "default.tsv")
+    tsv_path, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
+    with fs.open(tsv_path, "r") as f:
+        table_seg = pd.read_csv(f, sep="\t")
+
+    label_ids_positive = list(table_seg.loc[table_seg["marker_labels"] == 1, "label_id"])
+    label_ids_negative = list(table_seg.loc[table_seg["marker_labels"] == 2, "label_id"])
+
+    if group is None:
+        label_ids_marker = label_ids_positive + label_ids_negative
+        filter_mask = ~np.isin(segmentation, label_ids_marker)
+        segmentation[filter_mask] = 0
+
+        filter_mask = np.isin(segmentation, label_ids_positive)
+        segmentation[filter_mask] = 1
+        filter_mask = np.isin(segmentation, label_ids_negative)
+        segmentation[filter_mask] = 2
+    elif group == "positive":
+        filter_mask = ~np.isin(segmentation, label_ids_positive)
+        segmentation[filter_mask] = 0
+        filter_mask = np.isin(segmentation, label_ids_positive)
+        segmentation[filter_mask] = 1
+    elif group == "negative":
+        filter_mask = ~np.isin(segmentation, label_ids_negative)
+        segmentation[filter_mask] = 0
+        filter_mask = np.isin(segmentation, label_ids_negative)
+        segmentation[filter_mask] = 2
+    else:
+        raise ValueError("Choose either 'positive' or 'negative' as group value.")
+
+    segmentation = segmentation.astype("uint16")
+    return segmentation
+
+
+def export_lower_resolution(args):
+
+    # iterate through exporting lower resolutions
+    for scale in args.scale:
+        output_folder = os.path.join(args.output_folder, args.cochlea, f"scale{scale}")
+        os.makedirs(output_folder, exist_ok=True)
+
+        for group in ["positive", "negative"]:
+
+            input_key = f"s{scale}"
+            for channel in args.channels:
+
+                out_path = os.path.join(output_folder, f"{channel}_marker_{group}.tif")
+                if os.path.exists(out_path):
+                    continue
+
+                print("Exporting channel", channel)
+                internal_path = os.path.join(args.cochlea, "images",  "ome-zarr", f"{channel}.ome.zarr")
+                s3_store, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT)
+                with zarr.open(s3_store, mode="r") as f:
+                    data = f[input_key][:]
+                print("Data shape", data.shape)
+
+                print(f"Filtering {group} marker instances.")
+                data = filter_marker_instances(args.cochlea, data, channel, group=group)
+                tifffile.imwrite(out_path, data, bigtiff=True, compression="zlib")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--cochlea", "-c", required=True)
+    parser.add_argument("--scale", "-s", nargs="+", type=int, required=True)
+    parser.add_argument("--output_folder", "-o", required=True)
+    parser.add_argument("--channels", nargs="+", type=str, default=["PV", "VGlut3", "CTBP2"])
+    args = parser.parse_args()
+
+    export_lower_resolution(args)
+
+
+if __name__ == "__main__":
+    main()