Add chreef analysis plotting scripts WIP

constantinpape · constantinpape · commit 4f9917f3060a · 2025-08-03T22:49:21.000+02:00
diff --git a/flamingo_tools/segmentation/chreef_utils.py b/flamingo_tools/segmentation/chreef_utils.py
@@ -12,12 +12,17 @@ def coord_from_string(center_str):
     return tuple([int(c) for c in center_str.split("-")])
 
 
-def find_annotations(annotation_dir, cochlea) -> dict:
-    """Create dictionary for analysis of ChReef annotations.
+def find_annotations(annotation_dir: str, cochlea: str) -> dict:
+    """Create a dictionary for the analysis of ChReef annotations.
+
     Annotations should have format positive-negative_<cochlea>_crop_<coord>_allNegativeExcluded_thr<thr>.tif
 
     Args:
         annotation_dir: Directory containing annotations.
+        cochlea: The name of the cochlea to analyze.
+
+    Returns:
+        Dictionary with information about the intensity annotations.
     """
 
     def extract_center_string(cochlea, name):
@@ -58,7 +63,7 @@ def get_roi(coord: tuple, roi_halo: tuple, resolution: float = 0.38) -> Tuple[in
         resolution: Resolution of array in µm.
 
     Returns:
-        region of interest
+        The region of interest.
     """
     coords = list(coord)
     # reverse dimensions for correct extraction
@@ -124,6 +129,9 @@ def find_inbetween_ids(
         arr_negexc: Array with all negatives excluded.
         arr_allweak: Array with all weak positives.
         roi_sgn: Region of interest of segmentation.
+
+    Returns:
+        A list of the ids that are in between the respective thresholds.
     """
     # negative annotation == 1, positive annotation == 2
     negexc_negatives = find_overlapping_masks(arr_negexc, roi_seg, label_id_base=1)
diff --git a/scripts/plots/chreef_analysis.py b/scripts/plots/chreef_analysis.py
@@ -0,0 +1,87 @@
+import json
+import os
+import pickle
+
+# import matplotlib.pyplot as plt
+# import numpy as np
+import pandas as pd
+# import tifffile
+# import zarr
+# from matplotlib import cm, colors
+
+from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target
+
+INTENSITY_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/mobie_project/cochlea-lightsheet/tables/measurements"  # noqa
+# The cochlea for the CHReef analysis.
+COCHLEAE = [
+    "M_LR_000143_L",
+    "M_LR_000144_L",
+    "M_LR_000145_L",
+    "M_LR_000153_L",
+    "M_LR_000155_L",
+    "M_LR_000189_L",
+    "M_LR_000143_R",
+    "M_LR_000144_R",
+    "M_LR_000145_R",
+    "M_LR_000153_R",
+    "M_LR_000155_R",
+    "M_LR_000189_R",
+]
+
+
+def download_data():
+    s3 = create_s3_target()
+    source_name = "SGN_v2"
+
+    cache_path = "./chreef_data.pkl"
+    if os.path.exists(cache_path):
+        with open(cache_path, "rb") as f:
+            return pickle.load(f)
+
+    chreef_data = {}
+    for cochlea in COCHLEAE:
+        print("Processsing cochlea:", cochlea)
+        content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
+        info = json.loads(content.read())
+        sources = info["sources"]
+
+        # Load the seg table and filter the compartments.
+        source = sources[source_name]["segmentation"]
+        rel_path = source["tableData"]["tsv"]["relativePath"]
+        table_content = s3.open(os.path.join(BUCKET_NAME, cochlea, rel_path, "default.tsv"), mode="rb")
+        table = pd.read_csv(table_content, sep="\t")
+
+        # May need to be adjusted for some cochleae.
+        table = table[table.component_labels == 1]
+        # The relevant values for analysis.
+        try:
+            values = table[["label_id", "length[µm]", "frequency[kHz]", "marker_labels"]]
+        except KeyError:
+            print("Could not find the values for", cochlea, "it will be skippped.")
+            continue
+
+        fname = f"{cochlea.replace('_', '-')}_GFP_SGN-v2_object-measures.tsv"
+        intensity_file = os.path.join(INTENSITY_ROOT, fname)
+        assert os.path.exists(intensity_file), intensity_file
+        intensity_table = pd.read_csv(intensity_file, sep="\t")
+        values = values.merge(intensity_table, on="label_id")
+
+        chreef_data[cochlea] = values
+
+    with open(cache_path, "wb") as f:
+        chreef_data = pickle.dump(chreef_data, f)
+    return chreef_data
+
+
+def analyze_transduction(chreef_data):
+    breakpoint()
+    pass
+
+
+def main():
+    chreef_data = download_data()
+    analyze_transduction(chreef_data)
+
+
+if __name__ == "__main__":
+    main()