Skip to content

Commit 49fb49f

Browse files
Update SGN Subtype analysis
1 parent 7507d91 commit 49fb49f

File tree

2 files changed

+136
-55
lines changed

2 files changed

+136
-55
lines changed

scripts/figures/util.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def _get_mapping(animal):
2424
return bin_edges, bin_labels
2525

2626

27-
def frequency_mapping(frequencies, values, animal="mouse", transduction_efficiency=False):
27+
def frequency_mapping(frequencies, values, animal="mouse", transduction_efficiency=False, categorical=False):
2828
# Get the mapping of frequencies to octave bands for the given species.
2929
bin_edges, bin_labels = _get_mapping(animal)
3030

@@ -34,7 +34,18 @@ def frequency_mapping(frequencies, values, animal="mouse", transduction_efficien
3434
df["freq_khz"], bins=bin_edges, labels=bin_labels, right=False
3535
)
3636

37-
if transduction_efficiency: # We compute the transduction efficiency per band.
37+
if categorical:
38+
assert not transduction_efficiency
39+
categories = pd.unique(df.value)
40+
num_tot = df.groupby("octave_band", observed=False).size()
41+
value_by_band = {}
42+
for cat in categories:
43+
pos_cat = df[df.value == cat].groupby("octave_band", observed=False).size()
44+
cat_by_band = (pos_cat / num_tot).reindex(bin_labels)
45+
cat_by_band = cat_by_band.reset_index()
46+
cat_by_band.columns = ["octave_band", "value"]
47+
value_by_band[cat] = cat_by_band
48+
elif transduction_efficiency: # We compute the transduction efficiency per band.
3849
num_pos = df[df["value"] == 1].groupby("octave_band", observed=False).size()
3950
num_tot = df[df["value"].isin([1, 2])].groupby("octave_band", observed=False).size()
4051
value_by_band = (num_pos / num_tot).reindex(bin_labels)

scripts/measurements/sgn_subtypes.py

Lines changed: 123 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import sys
34
from glob import glob
45
from subprocess import run
56

@@ -10,18 +11,25 @@
1011
from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target, get_s3_path
1112
from flamingo_tools.measurements import compute_object_measures
1213

14+
sys.path.append("../figures")
15+
1316
# Map from cochlea names to channels
1417
COCHLEAE_FOR_SUBTYPES = {
1518
"M_LR_000099_L": ["PV", "Calb1", "Lypd1"],
1619
"M_LR_000214_L": ["PV", "CR", "Calb1"],
1720
"M_AMD_N62_L": ["PV", "CR", "Calb1"],
1821
"M_AMD_N180_R": ["CR", "Ntng1", "CTBP2"],
1922
"M_AMD_N180_L": ["CR", "Ntng1", "Lypd1"],
23+
"M_LR_000184_R": ["PV", "Prph"],
24+
"M_LR_000184_L": ["PV", "Prph"],
2025
# Mutant / some stuff is weird.
2126
# "M_AMD_Runx1_L": ["PV", "Lypd1", "Calb1"],
2227
# This one still has to be stitched:
2328
# "M_LR_000184_R": {"PV", "Prph"},
2429
}
30+
REGULAR_COCHLEAE = [
31+
"M_LR_000099_L", "M_LR_000214_L", "M_AMD_N62_L", "M_LR_000184_R", "M_LR_000184_L"
32+
]
2533

2634
# Map from channels to subtypes.
2735
# Comment Aleyna:
@@ -46,8 +54,31 @@
4654
},
4755
}
4856

57+
# For consistent colors.
58+
ALL_COLORS = ["red", "blue", "orange", "yellow", "cyan", "magenta", "green", "purple"]
59+
COLORS = {}
60+
4961
PLOT_OUT = "./subtype_plots"
5062

63+
# TODO: updates based on Aleyna's feedback.
64+
# Subtype mapping
65+
66+
# Combined visualization for the cochleae
67+
# Can we visualize the tonotopy in subtypes and not stainings?
68+
# It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
69+
# This would help to see if different staining gives same/similar results.
70+
# Type Ia ; CR+ / Calb1- or Calb1- / Lypd1-
71+
# Type Ib: CR+ / Calb1+ or Calb1+ / Lypd1+
72+
# Type Ic: CR-/Calb1+ - or Calb1- / Lypd1+
73+
# Type II: CR-/Calb1- or Calb1- / Lypd1- or Prph+
74+
75+
# > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds. Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds? As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
76+
# The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
77+
78+
# > Where are we at PV-Prph segmentation results from MLR184_L and R for SGN type II analysis? This would hopefully give <5% Prph+ cells.
79+
# The cochleae are in MoBIE. Segmentation and Prph signal look good! I will include it in the next analysis.
80+
# Need tonotopic mapping from Martin and then compute the intensities.
81+
5182

5283
def check_processing_status():
5384
s3 = create_s3_target()
@@ -63,6 +94,8 @@ def check_processing_status():
6394
missing_tables = {}
6495

6596
for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
97+
if cochlea not in REGULAR_COCHLEAE:
98+
continue
6699
try:
67100
content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
68101
except FileNotFoundError:
@@ -125,6 +158,8 @@ def require_missing_tables(missing_tables):
125158
output_root = "./object_measurements"
126159

127160
for cochlea, missing_tabs in missing_tables.items():
161+
if cochlea not in REGULAR_COCHLEAE:
162+
continue
128163
for missing in missing_tabs:
129164
channel = missing.split("_")[0]
130165
seg_name = missing.split("_")[1].replace("-", "_")
@@ -165,6 +200,8 @@ def compile_data_for_subtype_analysis():
165200
os.makedirs(output_folder, exist_ok=True)
166201

167202
for cochlea, channels in COCHLEAE_FOR_SUBTYPES.items():
203+
if cochlea not in REGULAR_COCHLEAE:
204+
continue
168205
if "PV" in channels:
169206
reference_channel = "PV"
170207
seg_name = "PV_SGN_v2"
@@ -217,27 +254,32 @@ def compile_data_for_subtype_analysis():
217254
output_table.to_csv(out_path, sep="\t", index=False)
218255

219256

220-
def _plot_histogram(table, column, name, show_plots, subtype=None):
257+
def _plot_histogram(table, column, name, show_plots, class_names=None, apply_threshold=True):
221258
data = table[column].values
222259
threshold = threshold_otsu(data)
223260

224261
fig, ax = plt.subplots(1)
225262
ax.hist(data, bins=24)
226-
ax.axvline(x=threshold, color='red', linestyle='--')
227-
ax.set_title(f"{name}\n threshold: {threshold}")
263+
if apply_threshold:
264+
ax.axvline(x=threshold, color='red', linestyle='--')
265+
ax.set_title(f"{name}\n threshold: {threshold}")
266+
else:
267+
ax.set_title(name)
228268

229269
if show_plots:
230270
plt.show()
231271
else:
232272
os.makedirs(PLOT_OUT, exist_ok=True)
233273
plt.savefig(f"{PLOT_OUT}/{name}.png")
234274

235-
if subtype is not None:
236-
subtype_classification = [None if datum < threshold else subtype for datum in data]
275+
if class_names is not None:
276+
assert len(class_names) == 2
277+
c0, c1 = class_names
278+
subtype_classification = [c0 if datum < threshold else c1 for datum in data]
237279
return subtype_classification
238280

239281

240-
def _plot_2d(ratios, name, show_plots, classification=None):
282+
def _plot_2d(ratios, name, show_plots, classification=None, colors=None):
241283
fig, ax = plt.subplots(1)
242284
assert len(ratios) == 2
243285
keys = list(ratios.keys())
@@ -247,42 +289,16 @@ def _plot_2d(ratios, name, show_plots, classification=None):
247289
ax.scatter(ratios[k1, k2])
248290

249291
else:
250-
def _combine(a, b):
251-
if a is None and b is None:
252-
return None
253-
elif a is None and b is not None:
254-
return b
255-
elif a is not None and b is None:
256-
return a
257-
else:
258-
return f"{a}-{b}"
259-
260-
classification = [cls for cls in classification if cls is not None]
261-
labels = classification[0].copy()
262-
for cls in classification[1:]:
263-
if cls is None:
264-
continue
265-
labels = [_combine(a, b) for a, b in zip(labels, cls)]
266-
267-
unique_labels = set(ll for ll in labels if ll is not None)
268-
all_colors = ["red", "blue", "orange", "yellow"]
269-
colors = {ll: color for ll, color in zip(unique_labels, all_colors[:len(unique_labels)])}
270-
292+
assert colors is not None
293+
unique_labels = set(classification)
271294
for lbl in unique_labels:
272-
mask = [ll == lbl for ll in labels]
295+
mask = [ll == lbl for ll in classification]
273296
ax.scatter(
274-
[ratios[k1][i] for i in range(len(labels)) if mask[i]],
275-
[ratios[k2][i] for i in range(len(labels)) if mask[i]],
297+
[ratios[k1][i] for i in range(len(classification)) if mask[i]],
298+
[ratios[k2][i] for i in range(len(classification)) if mask[i]],
276299
c=colors[lbl], label=lbl
277300
)
278301

279-
mask_none = [ll is None for ll in labels]
280-
ax.scatter(
281-
[ratios[k1][i] for i in range(len(labels)) if mask_none[i]],
282-
[ratios[k2][i] for i in range(len(labels)) if mask_none[i]],
283-
facecolors="none", edgecolors="black", label="None"
284-
)
285-
286302
ax.legend()
287303

288304
ax.set_xlabel(k1)
@@ -296,52 +312,106 @@ def _combine(a, b):
296312
plt.savefig(f"{PLOT_OUT}/{name}.png")
297313

298314

299-
# TODO enable over-writing by manual thresholds
300-
def analyze_subtype_data(show_plots=True):
315+
def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
316+
from util import frequency_mapping
317+
318+
frequency_mapped = frequency_mapping(freq, classification, categorical=True)
319+
result = next(iter(frequency_mapped.values()))
320+
bin_labels = pd.unique(result["octave_band"])
321+
band_to_x = {band: i for i, band in enumerate(bin_labels)}
322+
x_positions = result["octave_band"].map(band_to_x)
323+
324+
fig, ax = plt.subplots(figsize=(8, 4))
325+
for cat, vals in frequency_mapped.items():
326+
ax.scatter(x_positions, vals.value, label=cat, color=colors[cat])
327+
ax.legend()
328+
ax.set_title(name)
329+
330+
if show_plots:
331+
plt.show()
332+
else:
333+
os.makedirs(PLOT_OUT, exist_ok=True)
334+
plt.savefig(f"{PLOT_OUT}/{name}.png")
335+
plt.close()
336+
337+
338+
def analyze_subtype_data_regular(show_plots=True):
339+
global PLOT_OUT, COLORS # noqa
340+
PLOT_OUT = "subtype_plots/regular_mice"
341+
301342
files = sorted(glob("./subtype_analysis/*.tsv"))
302343

303344
for ff in files:
304345
cochlea = os.path.basename(ff)[:-len("_subtype_analysis.tsv")]
346+
if cochlea not in REGULAR_COCHLEAE:
347+
continue
305348
print(cochlea)
306349
channels = COCHLEAE_FOR_SUBTYPES[cochlea]
307-
reference_channel = "PV" if "PV" in channels else "CR"
350+
351+
reference_channel = "PV"
308352
assert channels[0] == reference_channel
309353

310354
tab = pd.read_csv(ff, sep="\t")
311355

312356
# 1.) Plot simple intensity histograms, including otsu threshold.
313-
for chan in channels:
314-
column = f"{chan}_median"
315-
name = f"{cochlea}_{chan}_histogram"
316-
_plot_histogram(tab, column, name, show_plots)
357+
# for chan in channels:
358+
# column = f"{chan}_median"
359+
# name = f"{cochlea}_{chan}_histogram"
360+
# _plot_histogram(tab, column, name, show_plots, apply_threshold=chan != reference_channel)
317361

318362
# 2.) Plot ratio histograms, including otsu threshold.
319-
# TODO ratio based classification and overlay in 2d plot?
320363
ratios = {}
321-
subtype_classification = []
364+
classification = []
322365
for chan in channels[1:]:
323366
column = f"{chan}_ratio_{reference_channel}"
324367
name = f"{cochlea}_{chan}_histogram_ratio_{reference_channel}"
325-
classification = _plot_histogram(
326-
tab, column, name, subtype=CHANNEL_TO_TYPE.get(chan, None), show_plots=show_plots
368+
chan_classification = _plot_histogram(
369+
tab, column, name, class_names=[f"{chan}-", f"{chan}+"], show_plots=show_plots
327370
)
328-
subtype_classification.append(classification)
371+
classification.append(chan_classification)
329372
ratios[f"{chan}_{reference_channel}"] = tab[column].values
330373

331-
# 3.) Plot 2D space of ratios.
374+
# Unify the classification and assign colors
375+
cls1, cls2 = classification[0], classification[1]
376+
assert len(cls1) == len(cls2)
377+
classification = [f"{c1} / {c2}" for c1, c2 in zip(cls1, cls2)]
378+
379+
unique_labels = set(classification)
380+
for label in unique_labels:
381+
if label in COLORS:
382+
continue
383+
if COLORS:
384+
last_color = list(COLORS.values())[-1]
385+
next_color = ALL_COLORS[ALL_COLORS.index(last_color) + 1]
386+
COLORS[label] = next_color
387+
else:
388+
COLORS[label] = ALL_COLORS[0]
389+
390+
# 3.) Plot tonotopic mapping.
391+
freq = tab["frequency[kHz]"].values
392+
assert len(freq) == len(classification)
393+
name = f"{cochlea}_tonotopic_mapping"
394+
_plot_tonotopic_mapping(freq, classification, name=name, colors=COLORS, show_plots=show_plots)
395+
396+
# 4.) Plot 2D space of ratios.
332397
name = f"{cochlea}_2d"
333-
_plot_2d(ratios, name, show_plots, classification=subtype_classification)
398+
_plot_2d(ratios, name, show_plots, classification=classification, colors=COLORS)
334399

335400

336401
# General notes:
337402
# See:
338403
def main():
339404
missing_tables = check_processing_status()
340405
require_missing_tables(missing_tables)
406+
compile_data_for_subtype_analysis()
407+
408+
# analyze_subtype_data_regular(show_plots=False)
341409

342-
# compile_data_for_subtype_analysis()
410+
# TODO
411+
# analyze_subtype_data_N_mice()
343412

344-
# analyze_subtype_data(show_plots=False)
413+
# CTBP2 stain
414+
# analyze_subtype_data_syn_mice()
345415

346416

347417
if __name__ == "__main__":

0 commit comments

Comments
 (0)