Update SGN subtype measurements

constantinpape · constantinpape · commit 2a7b66e878dd · 2025-08-25T13:55:43.000+02:00
diff --git a/scripts/measurements/measure_gerbil.py b/scripts/measurements/measure_gerbil.py
@@ -0,0 +1,100 @@
+import json
+import os
+
+import numpy as np
+import pandas as pd
+from flamingo_tools.s3_utils import create_s3_target, BUCKET_NAME
+
+COCHLEAE = ["G_EK_000233_L"]
+SGN_COMPONENTS = {}
+IHC_COMPONENTS = {"G_EK_000233_L": [1, 2, 3, 4, 5, 8]}
+
+
+def open_json(fs, path):
+    s3_path = os.path.join(BUCKET_NAME, path)
+    with fs.open(s3_path, "r") as f:
+        content = json.load(f)
+    return content
+
+
+def open_tsv(fs, path):
+    s3_path = os.path.join(BUCKET_NAME, path)
+    with fs.open(s3_path, "r") as f:
+        table = pd.read_csv(f, sep="\t")
+    return table
+
+
+def measure_sgns(fs):
+    print("SGNs:")
+    seg_name = "SGN_v2"
+    for dataset in COCHLEAE:
+        print("Cochlea:", dataset)
+        dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
+        sources = dataset_info["sources"]
+        assert seg_name in sources
+
+        source_info = sources[seg_name]["segmentation"]
+        table_path = source_info["tableData"]["tsv"]["relativePath"]
+        table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
+
+        component_labels = table.component_labels.values
+        component_ids = SGN_COMPONENTS.get(dataset, [1])
+        n_sgns = np.isin(component_labels, component_ids).sum()
+        print("N-SGNs:", n_sgns)
+
+
+def measure_ihcs(fs):
+    print("IHCs:")
+    seg_name = "IHC_v5"
+    for dataset in COCHLEAE:
+        print("Cochlea:", dataset)
+        dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
+        sources = dataset_info["sources"]
+        assert seg_name in sources
+
+        source_info = sources[seg_name]["segmentation"]
+        table_path = source_info["tableData"]["tsv"]["relativePath"]
+        table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
+
+        component_labels = table.component_labels.values
+        component_ids = IHC_COMPONENTS.get(dataset, [1])
+        n_ihcs = np.isin(component_labels, component_ids).sum()
+        print("N-IHCs:", n_ihcs)
+
+
+def measure_synapses(fs):
+    print("Synapses:")
+    spot_name = "synapses_v3_IHC_v5"
+    seg_name = "IHC_v5"
+    for dataset in COCHLEAE:
+        print("Cochlea:", dataset)
+        dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
+        sources = dataset_info["sources"]
+        assert spot_name in sources
+
+        source_info = sources[spot_name]["spots"]
+        table_path = source_info["tableData"]["tsv"]["relativePath"]
+        table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
+
+        source_info = sources[seg_name]["segmentation"]
+        table_path = source_info["tableData"]["tsv"]["relativePath"]
+        ihc_table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
+
+        ihc_components = IHC_COMPONENTS.get(dataset, [1])
+        valid_ihcs = ihc_table.label_id[ihc_table.component_labels.isin(ihc_components)]
+        table = table[table.matched_ihc.isin(valid_ihcs)]
+
+        _, syn_count = np.unique(table.matched_ihc.values, return_counts=True)
+        print("Avg Syn. per IHC:")
+        print(np.mean(syn_count), "+-", np.std(syn_count))
+
+
+def main():
+    fs = create_s3_target()
+    measure_sgns(fs)
+    measure_ihcs(fs)
+    measure_synapses(fs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/measurements/sgn_subtypes.py b/scripts/measurements/sgn_subtypes.py
@@ -3,20 +3,22 @@
 
 import pandas as pd
 from skimage.filters import threshold_otsu
-from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target
+
+from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target, get_s3_path
+from flamingo_tools.measurements import compute_object_measures
 
 # Map from cochlea names to channels
 COCHLEAE_FOR_SUBTYPES = {
     "M_LR_000099_L": ["PV", "Calb1", "Lypd1"],
     "M_LR_000214_L": ["PV", "CR", "Calb1"],
     "M_AMD_N62_L": ["PV", "CR", "Calb1"],
+    "M_AMD_N180_R": ["CR", "Ntng1", "CTBP2"],
     # Mutant / some stuff is weird.
     # "M_AMD_Runx1_L": ["PV", "Lypd1", "Calb1"],
     # This one still has to be stitched:
     # "M_LR_000184_R": {"PV", "Prph"},
     # We don't have PV here, so we exclude these two for now.
     # "M_AMD_00N180_L": {"CR", "Ntng1", "Lypd1"},
-    # "M_AMD_00N180_R": {"CR", "Ntng1", "CTBP2"},
 }
 
 # Map from channels to subtypes.
@@ -31,14 +33,14 @@
     "Calb1": "Type-Ib",
     "Lypd1": "Type-Ic",
     "Prph": "Type-II",
+    "Ntng1": "Type-Ib/c",
 }
 
-# TODO
+# For custom thresholds.
 THRESHOLDS = {
     "M_LR_000214_L": {
     },
     "M_AMD_N62_L": {
-        "Calb1": 380,
     },
 }
 
@@ -54,6 +56,8 @@ def check_processing_status():
     #     print(name)
     # breakpoint()
 
+    missing_tables = {}
+
     for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
         try:
             content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
@@ -74,24 +78,87 @@ def check_processing_status():
 
         if "SGN_v2" in sources:
             print("SGN segmentation is present with name SGN_v2")
+            seg_name = "SGN-v2"
+            table_folder = "tables/SGN_v2"
         elif "PV_SGN_v2" in sources:
             print("SGN segmentation is present with name PV_SGN_v2")
+            seg_name = "PV-SGN-v2"
+            table_folder = "tables/PV_SGN_v2"
+        elif "CR_SGN_v2" in sources:
+            print("SGN segmentation is present with name CR_SGN_v2")
+            seg_name = "CR-SGN-v2"
+            table_folder = "tables/CR_SGN_v2"
         else:
             print("SGN segmentation is MISSING")
+            print()
+            continue
+
+        # Check which tables we have.
+        expected_tables = [f"{chan}_{seg_name}_object-measures.tsv" for chan in channels]
+        tables = s3.ls(os.path.join(BUCKET_NAME, cochlea, table_folder))
+        tables = [os.path.basename(tab) for tab in tables]
+
+        this_missing_tables = []
+        for exp_tab in expected_tables:
+            if exp_tab not in tables:
+                print("Missing table:", exp_tab)
+                this_missing_tables.append(exp_tab)
+        missing_tables[cochlea] = this_missing_tables
         print()
 
+    return missing_tables
+
+
+def require_missing_tables(missing_tables):
+    output_root = "./object_measurements"
 
-def analyze_subtypes_intensity_based():
+    for cochlea, missing_tabs in missing_tables.items():
+        seg_name = "PV_SGN_v2" if "PV" in COCHLEAE_FOR_SUBTYPES[cochlea] else "CR_SGN_v2"
+        for missing in missing_tabs:
+            channel = missing.split("_")[0]
+            print(cochlea, channel)
+
+            img_s3 = f"{cochlea}/images/ome-zarr/{channel}.ome.zarr"
+            seg_s3 = f"{cochlea}/images/ome-zarr/{seg_name}.ome.zarr"
+            seg_table_s3 = f"{cochlea}/tables/{seg_name}/default.tsv"
+            img_path, _ = get_s3_path(img_s3)
+            seg_path, _ = get_s3_path(seg_s3)
+
+            output_folder = os.path.join(output_root, cochlea)
+            os.makedirs(output_folder, exist_ok=True)
+            output_table_path = os.path.join(output_folder, f"{channel}_{seg_name}_object-measures.tsv")
+            compute_object_measures(
+                image_path=img_path,
+                segmentation_path=seg_path,
+                segmentation_table_path=seg_table_s3,
+                output_table_path=output_table_path,
+                image_key="s0",
+                segmentation_key="s0",
+                s3_flag=True,
+                component_list=[1],
+                n_threads=8,
+            )
+            return
+
+            # TODO S3 upload
+
+
+def get_data_for_subtype_analysis():
     s3 = create_s3_target()
-    seg_name = "PV_SGN_v2"
 
     threshold_dict = {}
     output_folder = "./subtype_analysis"
     os.makedirs(output_folder, exist_ok=True)
 
     for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
-        # Remove the PV channel, which we don't need for analysis.
-        channels = channels[1:]
+        if "PV" in channels:
+            reference_channel = "PV"
+            seg_name = "PV_SGN_v2"
+        else:
+            assert "CR" in channels
+            reference_channel = "CR"
+            seg_name = "CR_SGN_v2"
+        reference_channel, seg_name
 
         content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
         info = json.loads(content.read())
@@ -157,10 +224,12 @@ def analyze_subtypes_intensity_based():
 #                Double check if this is the right channel. Maybe we try domain adaptation here?
 # M_LR_000214_L: PV looks correct, segmentation is not there yet.
 # M_AMD_N62_L: PV signal and segmentation look good.
-# M_AMD_Runx1_L: PV looks a bit off, but should work. Segmentation is not there yet.
+# M_AMD_N180_R: Need SGN segmentation based on CR.
 def main():
-    # check_processing_status()
-    analyze_subtypes_intensity_based()
+    missing_tables = check_processing_status()
+    require_missing_tables(missing_tables)
+
+    # analyze_subtypes_intensity_based()
 
 
 if __name__ == "__main__":