Skip to content

Commit 2a7b66e

Browse files
Update SGN subtype measurements
1 parent 0ebea67 commit 2a7b66e

File tree

2 files changed

+180
-11
lines changed

2 files changed

+180
-11
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import json
2+
import os
3+
4+
import numpy as np
5+
import pandas as pd
6+
from flamingo_tools.s3_utils import create_s3_target, BUCKET_NAME
7+
8+
COCHLEAE = ["G_EK_000233_L"]
9+
SGN_COMPONENTS = {}
10+
IHC_COMPONENTS = {"G_EK_000233_L": [1, 2, 3, 4, 5, 8]}
11+
12+
13+
def open_json(fs, path):
14+
s3_path = os.path.join(BUCKET_NAME, path)
15+
with fs.open(s3_path, "r") as f:
16+
content = json.load(f)
17+
return content
18+
19+
20+
def open_tsv(fs, path):
21+
s3_path = os.path.join(BUCKET_NAME, path)
22+
with fs.open(s3_path, "r") as f:
23+
table = pd.read_csv(f, sep="\t")
24+
return table
25+
26+
27+
def measure_sgns(fs):
28+
print("SGNs:")
29+
seg_name = "SGN_v2"
30+
for dataset in COCHLEAE:
31+
print("Cochlea:", dataset)
32+
dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
33+
sources = dataset_info["sources"]
34+
assert seg_name in sources
35+
36+
source_info = sources[seg_name]["segmentation"]
37+
table_path = source_info["tableData"]["tsv"]["relativePath"]
38+
table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
39+
40+
component_labels = table.component_labels.values
41+
component_ids = SGN_COMPONENTS.get(dataset, [1])
42+
n_sgns = np.isin(component_labels, component_ids).sum()
43+
print("N-SGNs:", n_sgns)
44+
45+
46+
def measure_ihcs(fs):
47+
print("IHCs:")
48+
seg_name = "IHC_v5"
49+
for dataset in COCHLEAE:
50+
print("Cochlea:", dataset)
51+
dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
52+
sources = dataset_info["sources"]
53+
assert seg_name in sources
54+
55+
source_info = sources[seg_name]["segmentation"]
56+
table_path = source_info["tableData"]["tsv"]["relativePath"]
57+
table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
58+
59+
component_labels = table.component_labels.values
60+
component_ids = IHC_COMPONENTS.get(dataset, [1])
61+
n_ihcs = np.isin(component_labels, component_ids).sum()
62+
print("N-IHCs:", n_ihcs)
63+
64+
65+
def measure_synapses(fs):
66+
print("Synapses:")
67+
spot_name = "synapses_v3_IHC_v5"
68+
seg_name = "IHC_v5"
69+
for dataset in COCHLEAE:
70+
print("Cochlea:", dataset)
71+
dataset_info = open_json(fs, os.path.join(dataset, "dataset.json"))
72+
sources = dataset_info["sources"]
73+
assert spot_name in sources
74+
75+
source_info = sources[spot_name]["spots"]
76+
table_path = source_info["tableData"]["tsv"]["relativePath"]
77+
table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
78+
79+
source_info = sources[seg_name]["segmentation"]
80+
table_path = source_info["tableData"]["tsv"]["relativePath"]
81+
ihc_table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv"))
82+
83+
ihc_components = IHC_COMPONENTS.get(dataset, [1])
84+
valid_ihcs = ihc_table.label_id[ihc_table.component_labels.isin(ihc_components)]
85+
table = table[table.matched_ihc.isin(valid_ihcs)]
86+
87+
_, syn_count = np.unique(table.matched_ihc.values, return_counts=True)
88+
print("Avg Syn. per IHC:")
89+
print(np.mean(syn_count), "+-", np.std(syn_count))
90+
91+
92+
def main():
93+
fs = create_s3_target()
94+
measure_sgns(fs)
95+
measure_ihcs(fs)
96+
measure_synapses(fs)
97+
98+
99+
if __name__ == "__main__":
100+
main()

scripts/measurements/sgn_subtypes.py

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,22 @@
33

44
import pandas as pd
55
from skimage.filters import threshold_otsu
6-
from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target
6+
7+
from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target, get_s3_path
8+
from flamingo_tools.measurements import compute_object_measures
79

810
# Map from cochlea names to channels
911
COCHLEAE_FOR_SUBTYPES = {
1012
"M_LR_000099_L": ["PV", "Calb1", "Lypd1"],
1113
"M_LR_000214_L": ["PV", "CR", "Calb1"],
1214
"M_AMD_N62_L": ["PV", "CR", "Calb1"],
15+
"M_AMD_N180_R": ["CR", "Ntng1", "CTBP2"],
1316
# Mutant / some stuff is weird.
1417
# "M_AMD_Runx1_L": ["PV", "Lypd1", "Calb1"],
1518
# This one still has to be stitched:
1619
# "M_LR_000184_R": {"PV", "Prph"},
1720
# We don't have PV here, so we exclude these two for now.
1821
# "M_AMD_00N180_L": {"CR", "Ntng1", "Lypd1"},
19-
# "M_AMD_00N180_R": {"CR", "Ntng1", "CTBP2"},
2022
}
2123

2224
# Map from channels to subtypes.
@@ -31,14 +33,14 @@
3133
"Calb1": "Type-Ib",
3234
"Lypd1": "Type-Ic",
3335
"Prph": "Type-II",
36+
"Ntng1": "Type-Ib/c",
3437
}
3538

36-
# TODO
39+
# For custom thresholds.
3740
THRESHOLDS = {
3841
"M_LR_000214_L": {
3942
},
4043
"M_AMD_N62_L": {
41-
"Calb1": 380,
4244
},
4345
}
4446

@@ -54,6 +56,8 @@ def check_processing_status():
5456
# print(name)
5557
# breakpoint()
5658

59+
missing_tables = {}
60+
5761
for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
5862
try:
5963
content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
@@ -74,24 +78,87 @@ def check_processing_status():
7478

7579
if "SGN_v2" in sources:
7680
print("SGN segmentation is present with name SGN_v2")
81+
seg_name = "SGN-v2"
82+
table_folder = "tables/SGN_v2"
7783
elif "PV_SGN_v2" in sources:
7884
print("SGN segmentation is present with name PV_SGN_v2")
85+
seg_name = "PV-SGN-v2"
86+
table_folder = "tables/PV_SGN_v2"
87+
elif "CR_SGN_v2" in sources:
88+
print("SGN segmentation is present with name CR_SGN_v2")
89+
seg_name = "CR-SGN-v2"
90+
table_folder = "tables/CR_SGN_v2"
7991
else:
8092
print("SGN segmentation is MISSING")
93+
print()
94+
continue
95+
96+
# Check which tables we have.
97+
expected_tables = [f"{chan}_{seg_name}_object-measures.tsv" for chan in channels]
98+
tables = s3.ls(os.path.join(BUCKET_NAME, cochlea, table_folder))
99+
tables = [os.path.basename(tab) for tab in tables]
100+
101+
this_missing_tables = []
102+
for exp_tab in expected_tables:
103+
if exp_tab not in tables:
104+
print("Missing table:", exp_tab)
105+
this_missing_tables.append(exp_tab)
106+
missing_tables[cochlea] = this_missing_tables
81107
print()
82108

109+
return missing_tables
110+
111+
112+
def require_missing_tables(missing_tables):
113+
output_root = "./object_measurements"
83114

84-
def analyze_subtypes_intensity_based():
115+
for cochlea, missing_tabs in missing_tables.items():
116+
seg_name = "PV_SGN_v2" if "PV" in COCHLEAE_FOR_SUBTYPES[cochlea] else "CR_SGN_v2"
117+
for missing in missing_tabs:
118+
channel = missing.split("_")[0]
119+
print(cochlea, channel)
120+
121+
img_s3 = f"{cochlea}/images/ome-zarr/{channel}.ome.zarr"
122+
seg_s3 = f"{cochlea}/images/ome-zarr/{seg_name}.ome.zarr"
123+
seg_table_s3 = f"{cochlea}/tables/{seg_name}/default.tsv"
124+
img_path, _ = get_s3_path(img_s3)
125+
seg_path, _ = get_s3_path(seg_s3)
126+
127+
output_folder = os.path.join(output_root, cochlea)
128+
os.makedirs(output_folder, exist_ok=True)
129+
output_table_path = os.path.join(output_folder, f"{channel}_{seg_name}_object-measures.tsv")
130+
compute_object_measures(
131+
image_path=img_path,
132+
segmentation_path=seg_path,
133+
segmentation_table_path=seg_table_s3,
134+
output_table_path=output_table_path,
135+
image_key="s0",
136+
segmentation_key="s0",
137+
s3_flag=True,
138+
component_list=[1],
139+
n_threads=8,
140+
)
141+
return
142+
143+
# TODO S3 upload
144+
145+
146+
def get_data_for_subtype_analysis():
85147
s3 = create_s3_target()
86-
seg_name = "PV_SGN_v2"
87148

88149
threshold_dict = {}
89150
output_folder = "./subtype_analysis"
90151
os.makedirs(output_folder, exist_ok=True)
91152

92153
for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
93-
# Remove the PV channel, which we don't need for analysis.
94-
channels = channels[1:]
154+
if "PV" in channels:
155+
reference_channel = "PV"
156+
seg_name = "PV_SGN_v2"
157+
else:
158+
assert "CR" in channels
159+
reference_channel = "CR"
160+
seg_name = "CR_SGN_v2"
161+
reference_channel, seg_name
95162

96163
content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
97164
info = json.loads(content.read())
@@ -157,10 +224,12 @@ def analyze_subtypes_intensity_based():
157224
# Double check if this is the right channel. Maybe we try domain adaptation here?
158225
# M_LR_000214_L: PV looks correct, segmentation is not there yet.
159226
# M_AMD_N62_L: PV signal and segmentation look good.
160-
# M_AMD_Runx1_L: PV looks a bit off, but should work. Segmentation is not there yet.
227+
# M_AMD_N180_R: Need SGN segmentation based on CR.
161228
def main():
162-
# check_processing_status()
163-
analyze_subtypes_intensity_based()
229+
missing_tables = check_processing_status()
230+
require_missing_tables(missing_tables)
231+
232+
# analyze_subtypes_intensity_based()
164233

165234

166235
if __name__ == "__main__":

0 commit comments

Comments
 (0)