Skip to content

Commit 94774a0

Browse files
Update subtype analysis
1 parent 7d92250 commit 94774a0

File tree

1 file changed

+199
-57
lines changed

1 file changed

+199
-57
lines changed

scripts/measurements/sgn_subtypes.py

Lines changed: 199 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
import sys
44
from glob import glob
5-
from subprocess import run
65

76
import matplotlib.pyplot as plt
87
import pandas as pd
@@ -31,21 +30,6 @@
3130
"M_LR_000099_L", "M_LR_000214_L", "M_AMD_N62_L", "M_LR_000184_R", "M_LR_000184_L"
3231
]
3332

34-
# Map from channels to subtypes.
35-
# Comment Aleyna:
36-
# The signal will be a gradient between different subtypes:
37-
# For example CR is expressed more, is brigther,
38-
# in type 1a SGNs but exist in type Ib SGNs and to a lesser extent in type 1c.
39-
# Same is also true for other markers so we will need to set a threshold for each.
40-
# Luckily the signal seems less variable compared to GFP.
41-
CHANNEL_TO_TYPE = {
42-
"CR": "Type-Ia",
43-
"Calb1": "Type-Ib",
44-
"Lypd1": "Type-Ic",
45-
"Prph": "Type-II",
46-
"Ntng1": "Type-Ib/c",
47-
}
48-
4933
# For custom thresholds.
5034
THRESHOLDS = {
5135
"M_LR_000214_L": {
@@ -55,29 +39,50 @@
5539
}
5640

5741
# For consistent colors.
58-
ALL_COLORS = ["red", "blue", "orange", "yellow", "cyan", "magenta", "green", "purple"]
42+
ALL_COLORS = ["red", "blue", "orange", "yellow", "cyan", "magenta", "green", "purple", "gray", "black"]
5943
COLORS = {}
6044

6145
PLOT_OUT = "./subtype_plots"
6246

63-
# TODO: updates based on Aleyna's feedback.
64-
# Subtype mapping
6547

66-
# Combined visualization for the cochleae
67-
# Can we visualize the tonotopy in subtypes and not stainings?
68-
# It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
69-
# This would help to see if different staining gives same/similar results.
7048
# Type Ia ; CR+ / Calb1- or Calb1- / Lypd1-
7149
# Type Ib: CR+ / Calb1+ or Calb1+ / Lypd1+
7250
# Type Ic: CR-/Calb1+ - or Calb1- / Lypd1+
7351
# Type II: CR-/Calb1- or Calb1- / Lypd1- or Prph+
52+
def stain_to_type(stain):
53+
# Normalize the staining string.
54+
stains = stain.replace(" ", "").split("/")
55+
assert len(stains) in (1, 2)
7456

75-
# > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds. Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds? As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
76-
# The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
57+
if len(stains) == 1:
58+
stain_norm = stain
59+
else:
60+
s1, s2 = sorted(stains)
61+
stain_norm = f"{s1}/{s2}"
7762

78-
# > Where are we at PV-Prph segmentation results from MLR184_L and R for SGN type II analysis? This would hopefully give <5% Prph+ cells.
79-
# The cochleae are in MoBIE. Segmentation and Prph signal look good! I will include it in the next analysis.
80-
# Need tonotopic mapping from Martin and then compute the intensities.
63+
stain_to_type = {
64+
# Combinations of Calb1 and CR:
65+
"CR+/Calb1+": "Type Ib",
66+
"CR-/Calb1+": "Type Ib/Ic", # Calb1 is expressed at Ic less than Lypd1 but more then CR
67+
"CR+/Calb1-": "Type Ia",
68+
"CR-/Calb1-": "Type II",
69+
70+
# Combinations of Calb1 and Lypd1:
71+
"Calb1+/Lypd1+": "Type Ib/Ic",
72+
"Calb1+/Lypd1-": "Type Ib",
73+
"Calb1-/Lypd1+": "Type Ic",
74+
"Calb1-/Lypd1-": "inconclusive", # Can be Type Ia or Type II
75+
76+
# Prph is isolated.
77+
"Prph+": "Type II",
78+
"Prph-": "Type I",
79+
}
80+
81+
if stain_norm not in stain_to_type:
82+
breakpoint()
83+
raise ValueError(f"Invalid stain combination: {stain_norm}")
84+
85+
return stain_to_type[stain_norm], stain_norm
8186

8287

8388
def check_processing_status():
@@ -189,8 +194,9 @@ def require_missing_tables(missing_tables):
189194
)
190195

191196
# S3 upload
192-
run(["rclone", "--progress", "copyto", output_folder,
193-
f"cochlea-lightsheet:cochlea-lightsheet/{cochlea}/tables/{seg_name}"])
197+
# from subprocess import run
198+
# run(["rclone", "--progress", "copyto", output_folder,
199+
# f"cochlea-lightsheet:cochlea-lightsheet/{cochlea}/tables/{seg_name}"])
194200

195201

196202
def compile_data_for_subtype_analysis():
@@ -209,14 +215,20 @@ def compile_data_for_subtype_analysis():
209215
assert "CR" in channels
210216
reference_channel = "CR"
211217
seg_name = "CR_SGN_v2"
212-
reference_channel, seg_name
213218

214219
content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
215220
info = json.loads(content.read())
216221
sources = info["sources"]
217222

218223
# Load the segmentation table.
219-
seg_source = sources[seg_name]
224+
try:
225+
seg_source = sources[seg_name]
226+
except KeyError as e:
227+
if seg_name == "PV_SGN_v2":
228+
seg_source = sources["SGN_v2"]
229+
seg_name = "SGN_v2"
230+
else:
231+
raise e
220232
table_folder = os.path.join(
221233
BUCKET_NAME, cochlea, seg_source["segmentation"]["tableData"]["tsv"]["relativePath"]
222234
)
@@ -232,12 +244,19 @@ def compile_data_for_subtype_analysis():
232244
# Analyze the different channels (= different subtypes).
233245
reference_intensity = None
234246
for channel in channels:
235-
# Load the intensity table.
236-
intensity_path = os.path.join(table_folder, f"{channel}_{seg_name.replace('_', '-')}_object-measures.tsv")
237-
table_content = s3.open(intensity_path, mode="rb")
247+
# Load the intensity table, prefer local.
248+
table_name = f"{channel}_{seg_name.replace('_', '-')}_object-measures.tsv"
249+
intensity_path = os.path.join("object_measurements", cochlea, table_name)
250+
251+
if os.path.exists(intensity_path):
252+
intensities = pd.read_csv(intensity_path, sep="\t")
253+
else:
254+
intensity_path = os.path.join(table_folder, table_name)
255+
table_content = s3.open(intensity_path, mode="rb")
256+
257+
intensities = pd.read_csv(table_content, sep="\t")
258+
intensities = intensities[intensities.label_id.isin(valid_sgns)]
238259

239-
intensities = pd.read_csv(table_content, sep="\t")
240-
intensities = intensities[intensities.label_id.isin(valid_sgns)]
241260
assert len(table) == len(intensities)
242261
assert (intensities.label_id.values == table.label_id.values).all()
243262

@@ -258,11 +277,20 @@ def _plot_histogram(table, column, name, show_plots, class_names=None, apply_thr
258277
data = table[column].values
259278
threshold = threshold_otsu(data)
260279

280+
if class_names is not None:
281+
assert len(class_names) == 2
282+
c0, c1 = class_names
283+
subtype_classification = [c0 if datum < threshold else c1 for datum in data]
284+
261285
fig, ax = plt.subplots(1)
262286
ax.hist(data, bins=24)
263287
if apply_threshold:
264288
ax.axvline(x=threshold, color='red', linestyle='--')
265-
ax.set_title(f"{name}\n threshold: {threshold}")
289+
if class_names is None:
290+
ax.set_title(f"{name}\n threshold: {threshold}")
291+
else:
292+
pos_perc = len([st for st in subtype_classification if st == c1]) / float(len(subtype_classification))
293+
ax.set_title(f"{name}\n threshold: {threshold}\n %{c1}: {pos_perc * 100}")
266294
else:
267295
ax.set_title(name)
268296

@@ -271,11 +299,9 @@ def _plot_histogram(table, column, name, show_plots, class_names=None, apply_thr
271299
else:
272300
os.makedirs(PLOT_OUT, exist_ok=True)
273301
plt.savefig(f"{PLOT_OUT}/{name}.png")
302+
plt.close()
274303

275304
if class_names is not None:
276-
assert len(class_names) == 2
277-
c0, c1 = class_names
278-
subtype_classification = [c0 if datum < threshold else c1 for datum in data]
279305
return subtype_classification
280306

281307

@@ -310,6 +336,7 @@ def _plot_2d(ratios, name, show_plots, classification=None, colors=None):
310336
else:
311337
os.makedirs(PLOT_OUT, exist_ok=True)
312338
plt.savefig(f"{PLOT_OUT}/{name}.png")
339+
plt.close()
313340

314341

315342
def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
@@ -324,6 +351,11 @@ def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
324351
fig, ax = plt.subplots(figsize=(8, 4))
325352
for cat, vals in frequency_mapped.items():
326353
ax.scatter(x_positions, vals.value, label=cat, color=colors[cat])
354+
355+
main_ticks = range(len(bin_labels))
356+
ax.set_xticks(main_ticks)
357+
ax.set_xticklabels(bin_labels)
358+
ax.set_xlabel("Octave band (kHz)")
327359
ax.legend()
328360
ax.set_title(name)
329361

@@ -334,12 +366,103 @@ def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
334366
plt.savefig(f"{PLOT_OUT}/{name}.png")
335367
plt.close()
336368

369+
return frequency_mapped
370+
371+
372+
# Combined visualization for the cochleae
373+
# Can we visualize the tonotopy in subtypes and not stainings?
374+
# It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
375+
# This would help to see if different staining gives same/similar results.
376+
def combined_analysis(results, show_plots):
377+
#
378+
# Create the tonotopic mapping.
379+
#
380+
summary = {}
381+
for cochlea, result in results.items():
382+
if cochlea == "M_LR_000214_L": # One of the signals cannot be analyzed.
383+
continue
384+
mapping = result["tonotopic_mapping"]
385+
summary[cochlea] = mapping
386+
387+
colors = {}
388+
389+
fig, axes = plt.subplots(len(summary), sharey=True, figsize=(8, 8))
390+
for i, (cochlea, frequency_mapped) in enumerate(summary.items()):
391+
ax = axes[i]
392+
393+
result = next(iter(frequency_mapped.values()))
394+
bin_labels = pd.unique(result["octave_band"])
395+
band_to_x = {band: i for i, band in enumerate(bin_labels)}
396+
x_positions = result["octave_band"].map(band_to_x)
397+
398+
for cat, vals in frequency_mapped.items():
399+
values = vals.value
400+
cat = cat[:cat.find(" (")]
401+
if cat not in colors:
402+
current_colors = list(colors.values())
403+
next_color = ALL_COLORS[len(current_colors)]
404+
colors[cat] = next_color
405+
ax.scatter(x_positions, values, label=cat, color=colors[cat])
406+
407+
main_ticks = range(len(bin_labels))
408+
ax.set_xticks(main_ticks)
409+
ax.set_xticklabels(bin_labels)
410+
ax.set_title(cochlea)
411+
ax.legend()
412+
413+
ax.set_xlabel("Octave band (kHz)")
414+
plt.tight_layout()
415+
if show_plots:
416+
plt.show()
417+
else:
418+
plt.savefig("./subtype_plots/overview_tonotopic_mapping.png")
419+
plt.close()
420+
421+
#
422+
# Create the overview figure.
423+
#
424+
summary, types = {}, []
425+
for cochlea, result in results.items():
426+
if cochlea == "M_LR_000214_L": # One of the signals cannot be analyzed.
427+
continue
428+
429+
classification = result["classification"]
430+
classification = [cls[:cls.find(" (")] for cls in classification]
431+
n_tot = len(classification)
432+
433+
this_types = list(set(classification))
434+
types.extend(this_types)
435+
summary[cochlea] = {}
436+
for stype in types:
437+
n_type = len([cls for cls in classification if cls == stype])
438+
type_ratio = float(n_type) / n_tot
439+
summary[cochlea][stype] = type_ratio
440+
441+
types = list(set(types))
442+
df = pd.DataFrame(summary).fillna(0) # missing values → 0
443+
444+
# Transpose → cochleae on x-axis, subtypes stacked
445+
ax = df.T.plot(kind="bar", stacked=True, figsize=(8, 5))
446+
447+
ax.set_ylabel("Fraction")
448+
ax.set_xlabel("Cochlea")
449+
ax.set_title("Subtype Fractions per Cochlea")
450+
plt.xticks(rotation=0)
451+
plt.tight_layout()
452+
453+
if show_plots:
454+
plt.show()
455+
else:
456+
plt.savefig("./subtype_plots/overview.png")
457+
plt.close()
458+
337459

338460
def analyze_subtype_data_regular(show_plots=True):
339461
global PLOT_OUT, COLORS # noqa
340462
PLOT_OUT = "subtype_plots/regular_mice"
341463

342464
files = sorted(glob("./subtype_analysis/*.tsv"))
465+
results = {}
343466

344467
for ff in files:
345468
cochlea = os.path.basename(ff)[:-len("_subtype_analysis.tsv")]
@@ -354,10 +477,10 @@ def analyze_subtype_data_regular(show_plots=True):
354477
tab = pd.read_csv(ff, sep="\t")
355478

356479
# 1.) Plot simple intensity histograms, including otsu threshold.
357-
# for chan in channels:
358-
# column = f"{chan}_median"
359-
# name = f"{cochlea}_{chan}_histogram"
360-
# _plot_histogram(tab, column, name, show_plots, apply_threshold=chan != reference_channel)
480+
for chan in channels:
481+
column = f"{chan}_median"
482+
name = f"{cochlea}_{chan}_histogram"
483+
_plot_histogram(tab, column, name, show_plots, apply_threshold=chan != reference_channel)
361484

362485
# 2.) Plot ratio histograms, including otsu threshold.
363486
ratios = {}
@@ -372,9 +495,18 @@ def analyze_subtype_data_regular(show_plots=True):
372495
ratios[f"{chan}_{reference_channel}"] = tab[column].values
373496

374497
# Unify the classification and assign colors
375-
cls1, cls2 = classification[0], classification[1]
376-
assert len(cls1) == len(cls2)
377-
classification = [f"{c1} / {c2}" for c1, c2 in zip(cls1, cls2)]
498+
assert len(classification) in (1, 2)
499+
if len(classification) == 2:
500+
cls1, cls2 = classification[0], classification[1]
501+
assert len(cls1) == len(cls2)
502+
classification = [f"{c1} / {c2}" for c1, c2 in zip(cls1, cls2)]
503+
show_2d = True
504+
else:
505+
classification = classification[0]
506+
show_2d = False
507+
508+
classification = [stain_to_type(cls) for cls in classification]
509+
classification = [f"{stype} ({stain})" for stype, stain in classification]
378510

379511
unique_labels = set(classification)
380512
for label in unique_labels:
@@ -391,21 +523,31 @@ def analyze_subtype_data_regular(show_plots=True):
391523
freq = tab["frequency[kHz]"].values
392524
assert len(freq) == len(classification)
393525
name = f"{cochlea}_tonotopic_mapping"
394-
_plot_tonotopic_mapping(freq, classification, name=name, colors=COLORS, show_plots=show_plots)
526+
tonotopic_mapping = _plot_tonotopic_mapping(
527+
freq, classification, name=name, colors=COLORS, show_plots=show_plots
528+
)
395529

396530
# 4.) Plot 2D space of ratios.
397-
name = f"{cochlea}_2d"
398-
_plot_2d(ratios, name, show_plots, classification=classification, colors=COLORS)
531+
if show_2d:
532+
name = f"{cochlea}_2d"
533+
_plot_2d(ratios, name, show_plots, classification=classification, colors=COLORS)
399534

535+
results[cochlea] = {"classification": classification, "tonotopic_mapping": tonotopic_mapping}
400536

401-
# General notes:
402-
# See:
537+
combined_analysis(results, show_plots=show_plots)
538+
539+
540+
# More TODO:
541+
# > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds.
542+
# Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds?
543+
# As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
544+
# The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
403545
def main():
404-
missing_tables = check_processing_status()
405-
require_missing_tables(missing_tables)
406-
compile_data_for_subtype_analysis()
546+
# missing_tables = check_processing_status()
547+
# require_missing_tables(missing_tables)
548+
# compile_data_for_subtype_analysis()
407549

408-
# analyze_subtype_data_regular(show_plots=False)
550+
analyze_subtype_data_regular(show_plots=False)
409551

410552
# TODO
411553
# analyze_subtype_data_N_mice()

0 commit comments

Comments
 (0)