computational-cell-analytics · schilling40 · Aug 11, 2025 · Jul 11, 2025 · Jul 17, 2025 · Jul 23, 2025
diff --git a/flamingo_tools/segmentation/chreef_utils.py b/flamingo_tools/segmentation/chreef_utils.py
@@ -12,12 +12,17 @@ def coord_from_string(center_str):
     return tuple([int(c) for c in center_str.split("-")])
 
 
-def find_annotations(annotation_dir, cochlea) -> dict:
-    """Create dictionary for analysis of ChReef annotations.
+def find_annotations(annotation_dir: str, cochlea: str) -> dict:
+    """Create a dictionary for the analysis of ChReef annotations.
+
     Annotations should have format positive-negative_<cochlea>_crop_<coord>_allNegativeExcluded_thr<thr>.tif
 
     Args:
         annotation_dir: Directory containing annotations.
+        cochlea: The name of the cochlea to analyze.
+
+    Returns:
+        Dictionary with information about the intensity annotations.
     """
 
     def extract_center_string(cochlea, name):
@@ -58,7 +63,7 @@ def get_roi(coord: tuple, roi_halo: tuple, resolution: float = 0.38) -> Tuple[in
         resolution: Resolution of array in µm.
 
     Returns:
-        region of interest
+        The region of interest.
     """
     coords = list(coord)
     # reverse dimensions for correct extraction
@@ -123,7 +128,10 @@ def find_inbetween_ids(
     Args:
         arr_negexc: Array with all negatives excluded.
         arr_allweak: Array with all weak positives.
-        roi_sgn: Region of interest of segmentation.
+        roi_seg: Region of interest of segmentation.
+
+    Returns:
+        A list of the ids that are in between the respective thresholds.
     """
     # negative annotation == 1, positive annotation == 2
     negexc_negatives = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
@@ -141,8 +149,12 @@ def get_median_intensity(file_negexc, file_allweak, center, data_seg, table):
 
     roi_seg = data_seg[roi]
     inbetween_ids = find_inbetween_ids(arr_negexc, arr_allweak, roi_seg)
+    if len(inbetween_ids) == 0:
+        return None
+
     subset = table[table["label_id"].isin(inbetween_ids)]
     intensities = list(subset["median"])
+
     return np.median(list(intensities))
 
 
@@ -154,11 +166,14 @@ def localize_median_intensities(annotation_dir, cochlea, data_seg, table_measure
 
     for center_str in annotation_dic["center_strings"]:
         center_coord = coord_from_string(center_str)
-        print(f"Getting mean intensities for {center_coord}.")
+        print(f"Getting median intensities for {center_coord}.")
         file_pos = annotation_dic[center_str]["file_pos"]
         file_neg = annotation_dic[center_str]["file_neg"]
         median_intensity = get_median_intensity(file_neg, file_pos, center_coord, data_seg, table_measure)
 
+        if median_intensity is None:
+            print(f"No inbetween IDs found for {center_str}.")
+
         annotation_dic[center_str]["median_intensity"] = median_intensity
 
     return annotation_dic
diff --git a/scripts/figures/chreef_analysis.py b/scripts/figures/chreef_analysis.py
@@ -0,0 +1,87 @@
+import json
+import os
+import pickle
+
+# import matplotlib.pyplot as plt
+# import numpy as np
+import pandas as pd
+# import tifffile
+# import zarr
+# from matplotlib import cm, colors
+
+from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target
+
+INTENSITY_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/mobie_project/cochlea-lightsheet/tables/measurements"  # noqa
+# The cochlea for the CHReef analysis.
+COCHLEAE = [
+    "M_LR_000143_L",
+    "M_LR_000144_L",
+    "M_LR_000145_L",
+    "M_LR_000153_L",
+    "M_LR_000155_L",
+    "M_LR_000189_L",
+    "M_LR_000143_R",
+    "M_LR_000144_R",
+    "M_LR_000145_R",
+    "M_LR_000153_R",
+    "M_LR_000155_R",
+    "M_LR_000189_R",
+]
+
+
+def download_data():
+    s3 = create_s3_target()
+    source_name = "SGN_v2"
+
+    cache_path = "./chreef_data.pkl"
+    if os.path.exists(cache_path):
+        with open(cache_path, "rb") as f:
+            return pickle.load(f)
+
+    chreef_data = {}
+    for cochlea in COCHLEAE:
+        print("Processsing cochlea:", cochlea)
+        content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
+        info = json.loads(content.read())
+        sources = info["sources"]
+
+        # Load the seg table and filter the compartments.
+        source = sources[source_name]["segmentation"]
+        rel_path = source["tableData"]["tsv"]["relativePath"]
+        table_content = s3.open(os.path.join(BUCKET_NAME, cochlea, rel_path, "default.tsv"), mode="rb")
+        table = pd.read_csv(table_content, sep="\t")
+
+        # May need to be adjusted for some cochleae.
+        table = table[table.component_labels == 1]
+        # The relevant values for analysis.
+        try:
+            values = table[["label_id", "length[µm]", "frequency[kHz]", "marker_labels"]]
+        except KeyError:
+            print("Could not find the values for", cochlea, "it will be skippped.")
+            continue
+
+        fname = f"{cochlea.replace('_', '-')}_GFP_SGN-v2_object-measures.tsv"
+        intensity_file = os.path.join(INTENSITY_ROOT, fname)
+        assert os.path.exists(intensity_file), intensity_file
+        intensity_table = pd.read_csv(intensity_file, sep="\t")
+        values = values.merge(intensity_table, on="label_id")
+
+        chreef_data[cochlea] = values
+
+    with open(cache_path, "wb") as f:
+        chreef_data = pickle.dump(chreef_data, f)
+    return chreef_data
+
+
+def analyze_transduction(chreef_data):
+    breakpoint()
+    pass
+
+
+def main():
+    chreef_data = download_data()
+    analyze_transduction(chreef_data)
+
+
+if __name__ == "__main__":
+    main()