Skip to content

Commit 06ac936

Browse files
Update SGN subtype analysis
1 parent 2a7b66e commit 06ac936

File tree

6 files changed

+95
-46
lines changed

6 files changed

+95
-46
lines changed

reproducibility/label_components/repro_label_components.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pandas as pd
77
from flamingo_tools.s3_utils import get_s3_path
88
from flamingo_tools.segmentation.postprocessing import label_components_sgn, label_components_ihc
9+
from flamingo_tools.segmentation.cochlea_mapping import tonotopic_mapping
910

1011

1112
def repro_label_components(
@@ -14,6 +15,7 @@ def repro_label_components(
1415
s3_credentials: Optional[str] = None,
1516
s3_bucket_name: Optional[str] = None,
1617
s3_service_endpoint: Optional[str] = None,
18+
apply_tonotopic_mapping: bool = False,
1719
):
1820
min_size = 1000
1921
default_threshold_erode = None
@@ -23,7 +25,7 @@ def repro_label_components(
2325
default_cell_type = "sgn"
2426
default_component_list = [1]
2527

26-
with open(ddict, 'r') as myfile:
28+
with open(ddict, "r") as myfile:
2729
data = myfile.read()
2830
param_dicts = json.loads(data)
2931

@@ -39,12 +41,16 @@ def repro_label_components(
3941
cell_type = dic["cell_type"] if "cell_type" in dic else default_cell_type
4042
component_list = dic["component_list"] if "component_list" in dic else default_component_list
4143

42-
table_name = f"{cell_type.upper()}_{unet_version}"
44+
# The table name sometimes has to be over-written.
4345
# table_name = "PV_SGN_V2_DA"
46+
# table_name = "CR_SGN_v2"
47+
48+
table_name = f"{cell_type.upper()}_{unet_version}"
49+
4450
s3_path = os.path.join(f"{cochlea}", "tables", table_name, "default.tsv")
4551
tsv_path, fs = get_s3_path(s3_path, bucket_name=s3_bucket_name,
4652
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
47-
with fs.open(tsv_path, 'r') as f:
53+
with fs.open(tsv_path, "r") as f:
4854
table = pd.read_csv(f, sep="\t")
4955

5056
if cell_type == "sgn":
@@ -67,8 +73,12 @@ def repro_label_components(
6773
else:
6874
print(f"Custom component(s) have {largest_comp} {cell_type.upper()}s.")
6975

76+
if apply_tonotopic_mapping:
77+
tsv_table = tonotopic_mapping(tsv_table, cell_type=cell_type)
78+
7079
cochlea_str = "-".join(cochlea.split("_"))
7180
table_str = "-".join(table_name.split("_"))
81+
os.makedirs(output_dir, exist_ok=True)
7282
out_path = os.path.join(output_dir, "_".join([cochlea_str, f"{table_str}.tsv"]))
7383

7484
tsv_table.to_csv(out_path, sep="\t", index=False)
@@ -78,8 +88,9 @@ def main():
7888
parser = argparse.ArgumentParser(
7989
description="Script to label segmentation using a segmentation table and graph connected components.")
8090

81-
parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.")
82-
parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.")
91+
parser.add_argument("-i", "--input", type=str, required=True, help="Input JSON dictionary.")
92+
parser.add_argument("-o", "--output", type=str, required=True, help="Output directory.")
93+
parser.add_argument("-t", "--tonotopic_mapping", action="store_true", help="Also compute the tonotopic mapping.")
8394

8495
parser.add_argument("--s3_credentials", type=str, default=None,
8596
help="Input file containing S3 credentials. "
@@ -94,6 +105,7 @@ def main():
94105
repro_label_components(
95106
args.input, args.output,
96107
args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint,
108+
apply_tonotopic_mapping=args.tonotopic_mapping,
97109
)
98110

99111

reproducibility/templates_processing/REAMDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ For IHC segmentation run:
1515
After this, run the following to add segmentation to MoBIE, create component labelings and upload to S3:
1616
- templates_transfer/mobie_segmentation_template.sbatch
1717
- templates_transfer/s3_seg_template.sh
18-
- repro_label_components.py
18+
- label_components/repro_label_components.py
1919
- templates_transfer/s3_seg_template.sh
2020

2121
For ribbon synapse detection without associated IHC segmentation run

reproducibility/templates_processing/apply_unet_SGN_template.sbatch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ export MODEL=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/train
4747

4848
export PREDICTION_INSTANCES=10
4949

50-
# export INPUT_KEY="setup$STAIN_CHANNEL/timepoint0/s0"
51-
export INPUT_KEY="s0"
50+
export INPUT_KEY="setup$STAIN_CHANNEL/timepoint0/s0"
51+
# export INPUT_KEY="s0"
5252

5353
echo "Input directory: ${INPUT}"
5454
echo "Output directory: ${OUTPUT_FOLDER}"

reproducibility/templates_processing/mean_std_SGN_template.sbatch

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ export INPUT=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/"$COC
3333
export OUTPUT_FOLDER=/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/predictions/"$COCHLEA"/"$SEG_NAME"
3434
export SEG_CLASS="sgn"
3535

36-
# export INPUT_KEY="setup$STAIN_CHANNEL/timepoint0/s0"
37-
export INPUT_KEY="s0"
36+
export INPUT_KEY="setup$STAIN_CHANNEL/timepoint0/s0"
37+
# export INPUT_KEY="s0"
3838

3939
if ! [[ -f $OUTPUT_FOLDER ]] ; then
4040
mkdir -p "$OUTPUT_FOLDER"
@@ -51,4 +51,3 @@ cmd_array=( 'import sys,os;'
5151
'output_folder=os.environ["OUTPUT_FOLDER"],seg_class=os.environ["SEG_CLASS"])')
5252
cmd="${cmd_array[*]}"
5353
python -c "$cmd"
54-

scripts/measurements/sgn_subtypes.py

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import json
22
import os
3+
from glob import glob
4+
from subprocess import run
35

46
import pandas as pd
5-
from skimage.filters import threshold_otsu
67

78
from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target, get_s3_path
89
from flamingo_tools.measurements import compute_object_measures
@@ -116,7 +117,7 @@ def require_missing_tables(missing_tables):
116117
seg_name = "PV_SGN_v2" if "PV" in COCHLEAE_FOR_SUBTYPES[cochlea] else "CR_SGN_v2"
117118
for missing in missing_tabs:
118119
channel = missing.split("_")[0]
119-
print(cochlea, channel)
120+
print("Computing intensities for:", cochlea, channel)
120121

121122
img_s3 = f"{cochlea}/images/ome-zarr/{channel}.ome.zarr"
122123
seg_s3 = f"{cochlea}/images/ome-zarr/{seg_name}.ome.zarr"
@@ -126,7 +127,9 @@ def require_missing_tables(missing_tables):
126127

127128
output_folder = os.path.join(output_root, cochlea)
128129
os.makedirs(output_folder, exist_ok=True)
129-
output_table_path = os.path.join(output_folder, f"{channel}_{seg_name}_object-measures.tsv")
130+
output_table_path = os.path.join(
131+
output_folder, f"{channel}_{seg_name.replace('_', '-')}_object-measures.tsv"
132+
)
130133
compute_object_measures(
131134
image_path=img_path,
132135
segmentation_path=seg_path,
@@ -136,17 +139,17 @@ def require_missing_tables(missing_tables):
136139
segmentation_key="s0",
137140
s3_flag=True,
138141
component_list=[1],
139-
n_threads=8,
142+
n_threads=16,
140143
)
141-
return
142144

143-
# TODO S3 upload
145+
# S3 upload
146+
run(["rclone", "--progress", "copyto", output_folder,
147+
f"cochlea-lightsheet:cochlea-lightsheet/{cochlea}/tables/{seg_name}"])
144148

145149

146-
def get_data_for_subtype_analysis():
150+
def compile_data_for_subtype_analysis():
147151
s3 = create_s3_target()
148152

149-
threshold_dict = {}
150153
output_folder = "./subtype_analysis"
151154
os.makedirs(output_folder, exist_ok=True)
152155

@@ -176,47 +179,74 @@ def get_data_for_subtype_analysis():
176179
table = table[table.component_labels == 1]
177180
valid_sgns = table.label_id
178181

179-
output_table = {"label_id": table.label_id.values}
180-
threshold_dict[cochlea] = {}
182+
output_table = {"label_id": table.label_id.values, "frequency[kHz]": table["frequency[kHz]"]}
181183

182184
# Analyze the different channels (= different subtypes).
185+
reference_intensity = None
183186
for channel in channels:
184187
# Load the intensity table.
185-
intensity_path = os.path.join(table_folder, f"{channel}_PV-SGN-v2_object-measures.tsv")
186-
try:
187-
table_content = s3.open(intensity_path, mode="rb")
188-
except FileNotFoundError:
189-
print(intensity_path, "is missing")
190-
continue
188+
intensity_path = os.path.join(table_folder, f"{channel}_{seg_name.replace('_', '-')}_object-measures.tsv")
189+
table_content = s3.open(intensity_path, mode="rb")
190+
191191
intensities = pd.read_csv(table_content, sep="\t")
192192
intensities = intensities[intensities.label_id.isin(valid_sgns)]
193193
assert len(table) == len(intensities)
194194
assert (intensities.label_id.values == table.label_id.values).all()
195195

196-
# Intensity based analysis.
197196
medians = intensities["median"].values
198-
199-
# TODO: we need to determine the threshold in a better way / validate it in MoBIE.
200-
intensity_threshold = THRESHOLDS.get(cochlea, {}).get(channel, None)
201-
if intensity_threshold is None:
202-
print("Could not find a threshold for", cochlea, channel, "falling back to OTSU")
203-
intensity_threshold = float(threshold_otsu(medians))
204-
threshold_dict[cochlea][channel] = intensity_threshold
205-
206-
subtype = CHANNEL_TO_TYPE[channel]
207197
output_table[f"{channel}_median"] = medians
208-
output_table[f"is_{subtype}"] = medians > intensity_threshold
209-
210-
# Add the frequency mapping.
211-
# TODO
198+
if channel == reference_channel:
199+
reference_intensity = medians
200+
else:
201+
assert reference_intensity is not None
202+
output_table[f"{channel}_ratio_{reference_channel}"] = medians / reference_intensity
212203

213204
out_path = os.path.join(output_folder, f"{cochlea}_subtype_analysis.tsv")
214205
output_table = pd.DataFrame(output_table)
215-
output_table.to_csv(out_path, sep="\t")
206+
output_table.to_csv(out_path, sep="\t", index=False)
207+
208+
209+
def _plot_histogram(table, column, name, show_plots):
210+
data = table[column].values
211+
212+
# TODO determine automatic threshold
216213

217-
threshold_out = os.path.join(output_folder, "thresholds.json")
218-
with open(threshold_out, "w") as f:
219-
json.dump(threshold_dict, f, sort_keys=True, indent=4)
214+
if show_plots:
215+
pass
216+
else:
217+
pass
218+
219+
220+
# TODO enable over-writing by manual thresholds
221+
def analyze_subtype_data(show_plots=True):
222+
files = sorted(glob("./subtype_analysis/*.tsv"))
223+
224+
for ff in files:
225+
cochlea = os.path.basename(ff)[:-len("_subtype_analysis.tsv")]
226+
print(cochlea)
227+
channels = COCHLEAE_FOR_SUBTYPES[cochlea]
228+
reference_channel = "PV" if "PV" in channels else "CR"
229+
assert channels[0] == reference_channel
230+
231+
tab = pd.read_csv(ff, sep="\t")
232+
breakpoint()
233+
234+
# 1.) Plot simple intensity histograms, including otsu threshold.
235+
for chan in channels:
236+
column = f"{chan}_median"
237+
name = f"{cochlea}_{chan}_histogram.png"
238+
_plot_histogram(tab, column, name, show_plots)
239+
240+
# 2.) Plot ratio histograms, including otsu threshold.
241+
ratios = {}
242+
# TODO ratio based classification and overlay in 2d plot?
243+
for chan in channels[1:]:
244+
column = f"{chan}_median_ratio_{reference_channel}"
245+
name = f"{cochlea}_{chan}_histogram_ratio_{reference_channel}.png"
246+
_plot_histogram(tab, column, name, show_plots)
247+
ratios[f"{chan}_{reference_channel}"] = tab[column].values
248+
249+
# 3.) Plot 2D space of ratios.
220250

221251

222252
# General notes:
@@ -229,7 +259,9 @@ def main():
229259
missing_tables = check_processing_status()
230260
require_missing_tables(missing_tables)
231261

232-
# analyze_subtypes_intensity_based()
262+
# compile_data_for_subtype_analysis()
263+
264+
# analyze_subtype_data()
233265

234266

235267
if __name__ == "__main__":

scripts/more-annotations/extract_sgn_annotations.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,19 @@ def downscale_segmentation():
9494
)
9595

9696

97+
# Note: consider different normalization strategy for these cochleae and normalize by local intensity
98+
# rather than by global values.
99+
97100
# Also double check empty positions again and make sure they don't contain SGNs
98101
# Additional positions for LaVision annotations:
99102
# {"position":[2031.0655170248258,1925.206039671767,249.14546086048554],"timepoint":0}
100103
# {"position":[2378.3720460599393,2105.471228531872,303.9285928812524],"timepoint":0}
101104
# {"position":[1619.3251178227529,3444.7351705689553,271.2360278843609],"timepoint":0}
102105
# {"position":[2358.2784398426843,1503.2211953830192,762.7325586759833],"timepoint":0}
103106

107+
# Position in Marmoset:
108+
# {"position":[2462.7875134103206,2818.067344942212,1177.1380214828991],"timepoint":0}
109+
104110

105111
def main():
106112
# download_lavision_crops()

0 commit comments

Comments
 (0)