22import os
33import sys
44from glob import glob
5- from subprocess import run
65
76import matplotlib .pyplot as plt
87import pandas as pd
3130 "M_LR_000099_L" , "M_LR_000214_L" , "M_AMD_N62_L" , "M_LR_000184_R" , "M_LR_000184_L"
3231]
3332
34- # Map from channels to subtypes.
35- # Comment Aleyna:
36- # The signal will be a gradient between different subtypes:
37- # For example CR is expressed more, is brigther,
38- # in type 1a SGNs but exist in type Ib SGNs and to a lesser extent in type 1c.
39- # Same is also true for other markers so we will need to set a threshold for each.
40- # Luckily the signal seems less variable compared to GFP.
41- CHANNEL_TO_TYPE = {
42- "CR" : "Type-Ia" ,
43- "Calb1" : "Type-Ib" ,
44- "Lypd1" : "Type-Ic" ,
45- "Prph" : "Type-II" ,
46- "Ntng1" : "Type-Ib/c" ,
47- }
48-
4933# For custom thresholds.
5034THRESHOLDS = {
5135 "M_LR_000214_L" : {
5539}
5640
5741# For consistent colors.
58- ALL_COLORS = ["red" , "blue" , "orange" , "yellow" , "cyan" , "magenta" , "green" , "purple" ]
42+ ALL_COLORS = ["red" , "blue" , "orange" , "yellow" , "cyan" , "magenta" , "green" , "purple" , "gray" , "black" ]
5943COLORS = {}
6044
6145PLOT_OUT = "./subtype_plots"
6246
63- # TODO: updates based on Aleyna's feedback.
64- # Subtype mapping
6547
66- # Combined visualization for the cochleae
67- # Can we visualize the tonotopy in subtypes and not stainings?
68- # It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
69- # This would help to see if different staining gives same/similar results.
7048# Type Ia ; CR+ / Calb1- or Calb1- / Lypd1-
7149# Type Ib: CR+ / Calb1+ or Calb1+ / Lypd1+
7250# Type Ic: CR-/Calb1+ - or Calb1- / Lypd1+
7351# Type II: CR-/Calb1- or Calb1- / Lypd1- or Prph+
52+ def stain_to_type (stain ):
53+ # Normalize the staining string.
54+ stains = stain .replace (" " , "" ).split ("/" )
55+ assert len (stains ) in (1 , 2 )
7456
75- # > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds. Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds? As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
76- # The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
57+ if len (stains ) == 1 :
58+ stain_norm = stain
59+ else :
60+ s1 , s2 = sorted (stains )
61+ stain_norm = f"{ s1 } /{ s2 } "
7762
78- # > Where are we at PV-Prph segmentation results from MLR184_L and R for SGN type II analysis? This would hopefully give <5% Prph+ cells.
79- # The cochleae are in MoBIE. Segmentation and Prph signal look good! I will include it in the next analysis.
80- # Need tonotopic mapping from Martin and then compute the intensities.
63+ stain_to_type = {
64+ # Combinations of Calb1 and CR:
65+ "CR+/Calb1+" : "Type Ib" ,
66+ "CR-/Calb1+" : "Type Ib/Ic" , # Calb1 is expressed at Ic less than Lypd1 but more then CR
67+ "CR+/Calb1-" : "Type Ia" ,
68+ "CR-/Calb1-" : "Type II" ,
69+
70+ # Combinations of Calb1 and Lypd1:
71+ "Calb1+/Lypd1+" : "Type Ib/Ic" ,
72+ "Calb1+/Lypd1-" : "Type Ib" ,
73+ "Calb1-/Lypd1+" : "Type Ic" ,
74+ "Calb1-/Lypd1-" : "inconclusive" , # Can be Type Ia or Type II
75+
76+ # Prph is isolated.
77+ "Prph+" : "Type II" ,
78+ "Prph-" : "Type I" ,
79+ }
80+
81+ if stain_norm not in stain_to_type :
82+ breakpoint ()
83+ raise ValueError (f"Invalid stain combination: { stain_norm } " )
84+
85+ return stain_to_type [stain_norm ], stain_norm
8186
8287
8388def check_processing_status ():
@@ -189,8 +194,9 @@ def require_missing_tables(missing_tables):
189194 )
190195
191196 # S3 upload
192- run (["rclone" , "--progress" , "copyto" , output_folder ,
193- f"cochlea-lightsheet:cochlea-lightsheet/{ cochlea } /tables/{ seg_name } " ])
197+ # from subprocess import run
198+ # run(["rclone", "--progress", "copyto", output_folder,
199+ # f"cochlea-lightsheet:cochlea-lightsheet/{cochlea}/tables/{seg_name}"])
194200
195201
196202def compile_data_for_subtype_analysis ():
@@ -209,14 +215,20 @@ def compile_data_for_subtype_analysis():
209215 assert "CR" in channels
210216 reference_channel = "CR"
211217 seg_name = "CR_SGN_v2"
212- reference_channel , seg_name
213218
214219 content = s3 .open (f"{ BUCKET_NAME } /{ cochlea } /dataset.json" , mode = "r" , encoding = "utf-8" )
215220 info = json .loads (content .read ())
216221 sources = info ["sources" ]
217222
218223 # Load the segmentation table.
219- seg_source = sources [seg_name ]
224+ try :
225+ seg_source = sources [seg_name ]
226+ except KeyError as e :
227+ if seg_name == "PV_SGN_v2" :
228+ seg_source = sources ["SGN_v2" ]
229+ seg_name = "SGN_v2"
230+ else :
231+ raise e
220232 table_folder = os .path .join (
221233 BUCKET_NAME , cochlea , seg_source ["segmentation" ]["tableData" ]["tsv" ]["relativePath" ]
222234 )
@@ -232,12 +244,19 @@ def compile_data_for_subtype_analysis():
232244 # Analyze the different channels (= different subtypes).
233245 reference_intensity = None
234246 for channel in channels :
235- # Load the intensity table.
236- intensity_path = os .path .join (table_folder , f"{ channel } _{ seg_name .replace ('_' , '-' )} _object-measures.tsv" )
237- table_content = s3 .open (intensity_path , mode = "rb" )
247+ # Load the intensity table, prefer local.
248+ table_name = f"{ channel } _{ seg_name .replace ('_' , '-' )} _object-measures.tsv"
249+ intensity_path = os .path .join ("object_measurements" , cochlea , table_name )
250+
251+ if os .path .exists (intensity_path ):
252+ intensities = pd .read_csv (intensity_path , sep = "\t " )
253+ else :
254+ intensity_path = os .path .join (table_folder , table_name )
255+ table_content = s3 .open (intensity_path , mode = "rb" )
256+
257+ intensities = pd .read_csv (table_content , sep = "\t " )
258+ intensities = intensities [intensities .label_id .isin (valid_sgns )]
238259
239- intensities = pd .read_csv (table_content , sep = "\t " )
240- intensities = intensities [intensities .label_id .isin (valid_sgns )]
241260 assert len (table ) == len (intensities )
242261 assert (intensities .label_id .values == table .label_id .values ).all ()
243262
@@ -258,11 +277,20 @@ def _plot_histogram(table, column, name, show_plots, class_names=None, apply_thr
258277 data = table [column ].values
259278 threshold = threshold_otsu (data )
260279
280+ if class_names is not None :
281+ assert len (class_names ) == 2
282+ c0 , c1 = class_names
283+ subtype_classification = [c0 if datum < threshold else c1 for datum in data ]
284+
261285 fig , ax = plt .subplots (1 )
262286 ax .hist (data , bins = 24 )
263287 if apply_threshold :
264288 ax .axvline (x = threshold , color = 'red' , linestyle = '--' )
265- ax .set_title (f"{ name } \n threshold: { threshold } " )
289+ if class_names is None :
290+ ax .set_title (f"{ name } \n threshold: { threshold } " )
291+ else :
292+ pos_perc = len ([st for st in subtype_classification if st == c1 ]) / float (len (subtype_classification ))
293+ ax .set_title (f"{ name } \n threshold: { threshold } \n %{ c1 } : { pos_perc * 100 } " )
266294 else :
267295 ax .set_title (name )
268296
@@ -271,11 +299,9 @@ def _plot_histogram(table, column, name, show_plots, class_names=None, apply_thr
271299 else :
272300 os .makedirs (PLOT_OUT , exist_ok = True )
273301 plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
302+ plt .close ()
274303
275304 if class_names is not None :
276- assert len (class_names ) == 2
277- c0 , c1 = class_names
278- subtype_classification = [c0 if datum < threshold else c1 for datum in data ]
279305 return subtype_classification
280306
281307
@@ -310,6 +336,7 @@ def _plot_2d(ratios, name, show_plots, classification=None, colors=None):
310336 else :
311337 os .makedirs (PLOT_OUT , exist_ok = True )
312338 plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
339+ plt .close ()
313340
314341
315342def _plot_tonotopic_mapping (freq , classification , name , colors , show_plots ):
@@ -324,6 +351,11 @@ def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
324351 fig , ax = plt .subplots (figsize = (8 , 4 ))
325352 for cat , vals in frequency_mapped .items ():
326353 ax .scatter (x_positions , vals .value , label = cat , color = colors [cat ])
354+
355+ main_ticks = range (len (bin_labels ))
356+ ax .set_xticks (main_ticks )
357+ ax .set_xticklabels (bin_labels )
358+ ax .set_xlabel ("Octave band (kHz)" )
327359 ax .legend ()
328360 ax .set_title (name )
329361
@@ -334,12 +366,103 @@ def _plot_tonotopic_mapping(freq, classification, name, colors, show_plots):
334366 plt .savefig (f"{ PLOT_OUT } /{ name } .png" )
335367 plt .close ()
336368
369+ return frequency_mapped
370+
371+
372+ # Combined visualization for the cochleae
373+ # Can we visualize the tonotopy in subtypes and not stainings?
374+ # It would also be good to have subtype percentages per cochlea and pooled together as a diagram and tonotopy?
375+ # This would help to see if different staining gives same/similar results.
376+ def combined_analysis (results , show_plots ):
377+ #
378+ # Create the tonotopic mapping.
379+ #
380+ summary = {}
381+ for cochlea , result in results .items ():
382+ if cochlea == "M_LR_000214_L" : # One of the signals cannot be analyzed.
383+ continue
384+ mapping = result ["tonotopic_mapping" ]
385+ summary [cochlea ] = mapping
386+
387+ colors = {}
388+
389+ fig , axes = plt .subplots (len (summary ), sharey = True , figsize = (8 , 8 ))
390+ for i , (cochlea , frequency_mapped ) in enumerate (summary .items ()):
391+ ax = axes [i ]
392+
393+ result = next (iter (frequency_mapped .values ()))
394+ bin_labels = pd .unique (result ["octave_band" ])
395+ band_to_x = {band : i for i , band in enumerate (bin_labels )}
396+ x_positions = result ["octave_band" ].map (band_to_x )
397+
398+ for cat , vals in frequency_mapped .items ():
399+ values = vals .value
400+ cat = cat [:cat .find (" (" )]
401+ if cat not in colors :
402+ current_colors = list (colors .values ())
403+ next_color = ALL_COLORS [len (current_colors )]
404+ colors [cat ] = next_color
405+ ax .scatter (x_positions , values , label = cat , color = colors [cat ])
406+
407+ main_ticks = range (len (bin_labels ))
408+ ax .set_xticks (main_ticks )
409+ ax .set_xticklabels (bin_labels )
410+ ax .set_title (cochlea )
411+ ax .legend ()
412+
413+ ax .set_xlabel ("Octave band (kHz)" )
414+ plt .tight_layout ()
415+ if show_plots :
416+ plt .show ()
417+ else :
418+ plt .savefig ("./subtype_plots/overview_tonotopic_mapping.png" )
419+ plt .close ()
420+
421+ #
422+ # Create the overview figure.
423+ #
424+ summary , types = {}, []
425+ for cochlea , result in results .items ():
426+ if cochlea == "M_LR_000214_L" : # One of the signals cannot be analyzed.
427+ continue
428+
429+ classification = result ["classification" ]
430+ classification = [cls [:cls .find (" (" )] for cls in classification ]
431+ n_tot = len (classification )
432+
433+ this_types = list (set (classification ))
434+ types .extend (this_types )
435+ summary [cochlea ] = {}
436+ for stype in types :
437+ n_type = len ([cls for cls in classification if cls == stype ])
438+ type_ratio = float (n_type ) / n_tot
439+ summary [cochlea ][stype ] = type_ratio
440+
441+ types = list (set (types ))
442+ df = pd .DataFrame (summary ).fillna (0 ) # missing values → 0
443+
444+ # Transpose → cochleae on x-axis, subtypes stacked
445+ ax = df .T .plot (kind = "bar" , stacked = True , figsize = (8 , 5 ))
446+
447+ ax .set_ylabel ("Fraction" )
448+ ax .set_xlabel ("Cochlea" )
449+ ax .set_title ("Subtype Fractions per Cochlea" )
450+ plt .xticks (rotation = 0 )
451+ plt .tight_layout ()
452+
453+ if show_plots :
454+ plt .show ()
455+ else :
456+ plt .savefig ("./subtype_plots/overview.png" )
457+ plt .close ()
458+
337459
338460def analyze_subtype_data_regular (show_plots = True ):
339461 global PLOT_OUT , COLORS # noqa
340462 PLOT_OUT = "subtype_plots/regular_mice"
341463
342464 files = sorted (glob ("./subtype_analysis/*.tsv" ))
465+ results = {}
343466
344467 for ff in files :
345468 cochlea = os .path .basename (ff )[:- len ("_subtype_analysis.tsv" )]
@@ -354,10 +477,10 @@ def analyze_subtype_data_regular(show_plots=True):
354477 tab = pd .read_csv (ff , sep = "\t " )
355478
356479 # 1.) Plot simple intensity histograms, including otsu threshold.
357- # for chan in channels:
358- # column = f"{chan}_median"
359- # name = f"{cochlea}_{chan}_histogram"
360- # _plot_histogram(tab, column, name, show_plots, apply_threshold=chan != reference_channel)
480+ for chan in channels :
481+ column = f"{ chan } _median"
482+ name = f"{ cochlea } _{ chan } _histogram"
483+ _plot_histogram (tab , column , name , show_plots , apply_threshold = chan != reference_channel )
361484
362485 # 2.) Plot ratio histograms, including otsu threshold.
363486 ratios = {}
@@ -372,9 +495,18 @@ def analyze_subtype_data_regular(show_plots=True):
372495 ratios [f"{ chan } _{ reference_channel } " ] = tab [column ].values
373496
374497 # Unify the classification and assign colors
375- cls1 , cls2 = classification [0 ], classification [1 ]
376- assert len (cls1 ) == len (cls2 )
377- classification = [f"{ c1 } / { c2 } " for c1 , c2 in zip (cls1 , cls2 )]
498+ assert len (classification ) in (1 , 2 )
499+ if len (classification ) == 2 :
500+ cls1 , cls2 = classification [0 ], classification [1 ]
501+ assert len (cls1 ) == len (cls2 )
502+ classification = [f"{ c1 } / { c2 } " for c1 , c2 in zip (cls1 , cls2 )]
503+ show_2d = True
504+ else :
505+ classification = classification [0 ]
506+ show_2d = False
507+
508+ classification = [stain_to_type (cls ) for cls in classification ]
509+ classification = [f"{ stype } ({ stain } )" for stype , stain in classification ]
378510
379511 unique_labels = set (classification )
380512 for label in unique_labels :
@@ -391,21 +523,31 @@ def analyze_subtype_data_regular(show_plots=True):
391523 freq = tab ["frequency[kHz]" ].values
392524 assert len (freq ) == len (classification )
393525 name = f"{ cochlea } _tonotopic_mapping"
394- _plot_tonotopic_mapping (freq , classification , name = name , colors = COLORS , show_plots = show_plots )
526+ tonotopic_mapping = _plot_tonotopic_mapping (
527+ freq , classification , name = name , colors = COLORS , show_plots = show_plots
528+ )
395529
396530 # 4.) Plot 2D space of ratios.
397- name = f"{ cochlea } _2d"
398- _plot_2d (ratios , name , show_plots , classification = classification , colors = COLORS )
531+ if show_2d :
532+ name = f"{ cochlea } _2d"
533+ _plot_2d (ratios , name , show_plots , classification = classification , colors = COLORS )
399534
535+ results [cochlea ] = {"classification" : classification , "tonotopic_mapping" : tonotopic_mapping }
400536
401- # General notes:
402- # See:
537+ combined_analysis (results , show_plots = show_plots )
538+
539+
540+ # More TODO:
541+ # > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds.
542+ # Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds?
543+ # As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
544+ # The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
403545def main ():
404- missing_tables = check_processing_status ()
405- require_missing_tables (missing_tables )
406- compile_data_for_subtype_analysis ()
546+ # missing_tables = check_processing_status()
547+ # require_missing_tables(missing_tables)
548+ # compile_data_for_subtype_analysis()
407549
408- # analyze_subtype_data_regular(show_plots=False)
550+ analyze_subtype_data_regular (show_plots = False )
409551
410552 # TODO
411553 # analyze_subtype_data_N_mice()
0 commit comments