11import json
22import os
3+ import sys
34from glob import glob
45from subprocess import run
56
1011from flamingo_tools .s3_utils import BUCKET_NAME , create_s3_target , get_s3_path
1112from flamingo_tools .measurements import compute_object_measures
1213
14+ sys .path .append ("../figures" )
15+
1316# Map from cochlea names to channels
1417COCHLEAE_FOR_SUBTYPES = {
1518 "M_LR_000099_L" : ["PV" , "Calb1" , "Lypd1" ],
1619 "M_LR_000214_L" : ["PV" , "CR" , "Calb1" ],
1720 "M_AMD_N62_L" : ["PV" , "CR" , "Calb1" ],
1821 "M_AMD_N180_R" : ["CR" , "Ntng1" , "CTBP2" ],
1922 "M_AMD_N180_L" : ["CR" , "Ntng1" , "Lypd1" ],
23+ "M_LR_000184_R" : ["PV" , "Prph" ],
24+ "M_LR_000184_L" : ["PV" , "Prph" ],
2025 # Mutant / some stuff is weird.
2126 # "M_AMD_Runx1_L": ["PV", "Lypd1", "Calb1"],
2227 # This one still has to be stitched:
2328 # "M_LR_000184_R": {"PV", "Prph"},
2429}
30+ REGULAR_COCHLEAE = [
31+ "M_LR_000099_L" , "M_LR_000214_L" , "M_AMD_N62_L" , "M_LR_000184_R" , "M_LR_000184_L"
32+ ]
2533
2634# Map from channels to subtypes.
2735# Comment Aleyna:
4654 },
4755}
4856
57+ # For consistent colors.
58+ ALL_COLORS = ["red" , "blue" , "orange" , "yellow" , "cyan" , "magenta" , "green" , "purple" ]
59+ COLORS = {}
60+
4961PLOT_OUT = "./subtype_plots"
5062
63+ # TODO: updates based on Aleyna's feedback.
64+ # Subtype mapping
65+
66+ # Combined visualization for the cochleae
67+ # Can we visualize the tonotopy in subtypes and not stainings?
68+ # It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
69+ # This would help to see if different staining gives same/similar results.
70+ # Type Ia ; CR+ / Calb1- or Calb1- / Lypd1-
71+ # Type Ib: CR+ / Calb1+ or Calb1+ / Lypd1+
72+ # Type Ic: CR-/Calb1+ - or Calb1- / Lypd1+
73+ # Type II: CR-/Calb1- or Calb1- / Lypd1- or Prph+
74+
75+ # > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds. Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds? As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
76+ # The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
77+
78+ # > Where are we at PV-Prph segmentation results from MLR184_L and R for SGN type II analysis? This would hopefully give <5% Prph+ cells.
79+ # The cochleae are in MoBIE. Segmentation and Prph signal look good! I will include it in the next analysis.
80+ # Need tonotopic mapping from Martin and then compute the intensities.
81+
5182
5283def check_processing_status ():
5384 s3 = create_s3_target ()
@@ -63,6 +94,8 @@ def check_processing_status():
6394 missing_tables = {}
6495
6596 for cochlea , channels in COCHLEAE_FOR_SUBTYPES .items ():
97+ if cochlea not in REGULAR_COCHLEAE :
98+ continue
6699 try :
67100 content = s3 .open (f"{ BUCKET_NAME } /{ cochlea } /dataset.json" , mode = "r" , encoding = "utf-8" )
68101 except FileNotFoundError :
@@ -125,6 +158,8 @@ def require_missing_tables(missing_tables):
125158 output_root = "./object_measurements"
126159
127160 for cochlea , missing_tabs in missing_tables .items ():
161+ if cochlea not in REGULAR_COCHLEAE :
162+ continue
128163 for missing in missing_tabs :
129164 channel = missing .split ("_" )[0 ]
130165 seg_name = missing .split ("_" )[1 ].replace ("-" , "_" )
@@ -165,6 +200,8 @@ def compile_data_for_subtype_analysis():
165200 os .makedirs (output_folder , exist_ok = True )
166201
167202 for cochlea , channels in COCHLEAE_FOR_SUBTYPES .items ():
203+ if cochlea not in REGULAR_COCHLEAE :
204+ continue
168205 if "PV" in channels :
169206 reference_channel = "PV"
170207 seg_name = "PV_SGN_v2"
@@ -217,27 +254,32 @@ def compile_data_for_subtype_analysis():
217254 output_table .to_csv (out_path , sep = "\t " , index = False )
218255
219256
220- def _plot_histogram (table , column , name , show_plots , subtype = None ):
257+ def _plot_histogram (table , column , name , show_plots , class_names = None , apply_threshold = True ):
221258 data = table [column ].values
222259 threshold = threshold_otsu (data )
223260
224261 fig , ax = plt .subplots (1 )
225262 ax .hist (data , bins = 24 )
226- ax .axvline (x = threshold , color = 'red' , linestyle = '--' )
227- ax .set_title (f"{ name } \n threshold: { threshold } " )
263+ if apply_threshold :
264+ ax .axvline (x = threshold , color = 'red' , linestyle = '--' )
265+ ax .set_title (f"{ name } \n threshold: { threshold } " )
266+ else :
267+ ax .set_title (name )
228268
229269 if show_plots :
230270 plt .show ()
231271 else :
232272 os .makedirs (PLOT_OUT , exist_ok = True )
233273 plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
234274
235- if subtype is not None :
236- subtype_classification = [None if datum < threshold else subtype for datum in data ]
275+ if class_names is not None :
276+ assert len (class_names ) == 2
277+ c0 , c1 = class_names
278+ subtype_classification = [c0 if datum < threshold else c1 for datum in data ]
237279 return subtype_classification
238280
239281
240- def _plot_2d (ratios , name , show_plots , classification = None ):
282+ def _plot_2d (ratios , name , show_plots , classification = None , colors = None ):
241283 fig , ax = plt .subplots (1 )
242284 assert len (ratios ) == 2
243285 keys = list (ratios .keys ())
@@ -247,42 +289,16 @@ def _plot_2d(ratios, name, show_plots, classification=None):
247289 ax .scatter (ratios [k1 , k2 ])
248290
249291 else :
250- def _combine (a , b ):
251- if a is None and b is None :
252- return None
253- elif a is None and b is not None :
254- return b
255- elif a is not None and b is None :
256- return a
257- else :
258- return f"{ a } -{ b } "
259-
260- classification = [cls for cls in classification if cls is not None ]
261- labels = classification [0 ].copy ()
262- for cls in classification [1 :]:
263- if cls is None :
264- continue
265- labels = [_combine (a , b ) for a , b in zip (labels , cls )]
266-
267- unique_labels = set (ll for ll in labels if ll is not None )
268- all_colors = ["red" , "blue" , "orange" , "yellow" ]
269- colors = {ll : color for ll , color in zip (unique_labels , all_colors [:len (unique_labels )])}
270-
292+ assert colors is not None
293+ unique_labels = set (classification )
271294 for lbl in unique_labels :
272- mask = [ll == lbl for ll in labels ]
295+ mask = [ll == lbl for ll in classification ]
273296 ax .scatter (
274- [ratios [k1 ][i ] for i in range (len (labels )) if mask [i ]],
275- [ratios [k2 ][i ] for i in range (len (labels )) if mask [i ]],
297+ [ratios [k1 ][i ] for i in range (len (classification )) if mask [i ]],
298+ [ratios [k2 ][i ] for i in range (len (classification )) if mask [i ]],
276299 c = colors [lbl ], label = lbl
277300 )
278301
279- mask_none = [ll is None for ll in labels ]
280- ax .scatter (
281- [ratios [k1 ][i ] for i in range (len (labels )) if mask_none [i ]],
282- [ratios [k2 ][i ] for i in range (len (labels )) if mask_none [i ]],
283- facecolors = "none" , edgecolors = "black" , label = "None"
284- )
285-
286302 ax .legend ()
287303
288304 ax .set_xlabel (k1 )
@@ -296,52 +312,106 @@ def _combine(a, b):
296312 plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
297313
298314
299- # TODO enable over-writing by manual thresholds
300- def analyze_subtype_data (show_plots = True ):
315+ def _plot_tonotopic_mapping (freq , classification , name , colors , show_plots ):
316+ from util import frequency_mapping
317+
318+ frequency_mapped = frequency_mapping (freq , classification , categorical = True )
319+ result = next (iter (frequency_mapped .values ()))
320+ bin_labels = pd .unique (result ["octave_band" ])
321+ band_to_x = {band : i for i , band in enumerate (bin_labels )}
322+ x_positions = result ["octave_band" ].map (band_to_x )
323+
324+ fig , ax = plt .subplots (figsize = (8 , 4 ))
325+ for cat , vals in frequency_mapped .items ():
326+ ax .scatter (x_positions , vals .value , label = cat , color = colors [cat ])
327+ ax .legend ()
328+ ax .set_title (name )
329+
330+ if show_plots :
331+ plt .show ()
332+ else :
333+ os .makedirs (PLOT_OUT , exist_ok = True )
334+ plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
335+ plt .close ()
336+
337+
338+ def analyze_subtype_data_regular (show_plots = True ):
339+ global PLOT_OUT , COLORS # noqa
340+ PLOT_OUT = "subtype_plots/regular_mice"
341+
301342 files = sorted (glob ("./subtype_analysis/*.tsv" ))
302343
303344 for ff in files :
304345 cochlea = os .path .basename (ff )[:- len ("_subtype_analysis.tsv" )]
346+ if cochlea not in REGULAR_COCHLEAE :
347+ continue
305348 print (cochlea )
306349 channels = COCHLEAE_FOR_SUBTYPES [cochlea ]
307- reference_channel = "PV" if "PV" in channels else "CR"
350+
351+ reference_channel = "PV"
308352 assert channels [0 ] == reference_channel
309353
310354 tab = pd .read_csv (ff , sep = "\t " )
311355
312356 # 1.) Plot simple intensity histograms, including otsu threshold.
313- for chan in channels :
314- column = f"{ chan } _median"
315- name = f"{ cochlea } _{ chan } _histogram"
316- _plot_histogram (tab , column , name , show_plots )
357+ # for chan in channels:
358+ # column = f"{chan}_median"
359+ # name = f"{cochlea}_{chan}_histogram"
360+ # _plot_histogram(tab, column, name, show_plots, apply_threshold=chan != reference_channel )
317361
318362 # 2.) Plot ratio histograms, including otsu threshold.
319- # TODO ratio based classification and overlay in 2d plot?
320363 ratios = {}
321- subtype_classification = []
364+ classification = []
322365 for chan in channels [1 :]:
323366 column = f"{ chan } _ratio_{ reference_channel } "
324367 name = f"{ cochlea } _{ chan } _histogram_ratio_{ reference_channel } "
325- classification = _plot_histogram (
326- tab , column , name , subtype = CHANNEL_TO_TYPE . get ( chan , None ) , show_plots = show_plots
368+ chan_classification = _plot_histogram (
369+ tab , column , name , class_names = [ f" { chan } -" , f" { chan } +" ] , show_plots = show_plots
327370 )
328- subtype_classification .append (classification )
371+ classification .append (chan_classification )
329372 ratios [f"{ chan } _{ reference_channel } " ] = tab [column ].values
330373
331- # 3.) Plot 2D space of ratios.
374+ # Unify the classification and assign colors
375+ cls1 , cls2 = classification [0 ], classification [1 ]
376+ assert len (cls1 ) == len (cls2 )
377+ classification = [f"{ c1 } / { c2 } " for c1 , c2 in zip (cls1 , cls2 )]
378+
379+ unique_labels = set (classification )
380+ for label in unique_labels :
381+ if label in COLORS :
382+ continue
383+ if COLORS :
384+ last_color = list (COLORS .values ())[- 1 ]
385+ next_color = ALL_COLORS [ALL_COLORS .index (last_color ) + 1 ]
386+ COLORS [label ] = next_color
387+ else :
388+ COLORS [label ] = ALL_COLORS [0 ]
389+
390+ # 3.) Plot tonotopic mapping.
391+ freq = tab ["frequency[kHz]" ].values
392+ assert len (freq ) == len (classification )
393+ name = f"{ cochlea } _tonotopic_mapping"
394+ _plot_tonotopic_mapping (freq , classification , name = name , colors = COLORS , show_plots = show_plots )
395+
396+ # 4.) Plot 2D space of ratios.
332397 name = f"{ cochlea } _2d"
333- _plot_2d (ratios , name , show_plots , classification = subtype_classification )
398+ _plot_2d (ratios , name , show_plots , classification = classification , colors = COLORS )
334399
335400
336401# General notes:
337402# See:
338403def main ():
339404 missing_tables = check_processing_status ()
340405 require_missing_tables (missing_tables )
406+ compile_data_for_subtype_analysis ()
407+
408+ # analyze_subtype_data_regular(show_plots=False)
341409
342- # compile_data_for_subtype_analysis()
410+ # TODO
411+ # analyze_subtype_data_N_mice()
343412
344- # analyze_subtype_data(show_plots=False)
413+ # CTBP2 stain
414+ # analyze_subtype_data_syn_mice()
345415
346416
347417if __name__ == "__main__" :
0 commit comments