11import json
22import os
3+ from glob import glob
4+ from subprocess import run
35
46import pandas as pd
5- from skimage .filters import threshold_otsu
67
78from flamingo_tools .s3_utils import BUCKET_NAME , create_s3_target , get_s3_path
89from flamingo_tools .measurements import compute_object_measures
@@ -116,7 +117,7 @@ def require_missing_tables(missing_tables):
116117 seg_name = "PV_SGN_v2" if "PV" in COCHLEAE_FOR_SUBTYPES [cochlea ] else "CR_SGN_v2"
117118 for missing in missing_tabs :
118119 channel = missing .split ("_" )[0 ]
119- print (cochlea , channel )
120+ print ("Computing intensities for:" , cochlea , channel )
120121
121122 img_s3 = f"{ cochlea } /images/ome-zarr/{ channel } .ome.zarr"
122123 seg_s3 = f"{ cochlea } /images/ome-zarr/{ seg_name } .ome.zarr"
@@ -126,7 +127,9 @@ def require_missing_tables(missing_tables):
126127
127128 output_folder = os .path .join (output_root , cochlea )
128129 os .makedirs (output_folder , exist_ok = True )
129- output_table_path = os .path .join (output_folder , f"{ channel } _{ seg_name } _object-measures.tsv" )
130+ output_table_path = os .path .join (
131+ output_folder , f"{ channel } _{ seg_name .replace ('_' , '-' )} _object-measures.tsv"
132+ )
130133 compute_object_measures (
131134 image_path = img_path ,
132135 segmentation_path = seg_path ,
@@ -136,17 +139,17 @@ def require_missing_tables(missing_tables):
136139 segmentation_key = "s0" ,
137140 s3_flag = True ,
138141 component_list = [1 ],
139- n_threads = 8 ,
142+ n_threads = 16 ,
140143 )
141- return
142144
143- # TODO S3 upload
145+ # S3 upload
146+ run (["rclone" , "--progress" , "copyto" , output_folder ,
147+ f"cochlea-lightsheet:cochlea-lightsheet/{ cochlea } /tables/{ seg_name } " ])
144148
145149
146- def get_data_for_subtype_analysis ():
150+ def compile_data_for_subtype_analysis ():
147151 s3 = create_s3_target ()
148152
149- threshold_dict = {}
150153 output_folder = "./subtype_analysis"
151154 os .makedirs (output_folder , exist_ok = True )
152155
@@ -176,47 +179,74 @@ def get_data_for_subtype_analysis():
176179 table = table [table .component_labels == 1 ]
177180 valid_sgns = table .label_id
178181
179- output_table = {"label_id" : table .label_id .values }
180- threshold_dict [cochlea ] = {}
182+ output_table = {"label_id" : table .label_id .values , "frequency[kHz]" : table ["frequency[kHz]" ]}
181183
182184 # Analyze the different channels (= different subtypes).
185+ reference_intensity = None
183186 for channel in channels :
184187 # Load the intensity table.
185- intensity_path = os .path .join (table_folder , f"{ channel } _PV-SGN-v2_object-measures.tsv" )
186- try :
187- table_content = s3 .open (intensity_path , mode = "rb" )
188- except FileNotFoundError :
189- print (intensity_path , "is missing" )
190- continue
188+ intensity_path = os .path .join (table_folder , f"{ channel } _{ seg_name .replace ('_' , '-' )} _object-measures.tsv" )
189+ table_content = s3 .open (intensity_path , mode = "rb" )
190+
191191 intensities = pd .read_csv (table_content , sep = "\t " )
192192 intensities = intensities [intensities .label_id .isin (valid_sgns )]
193193 assert len (table ) == len (intensities )
194194 assert (intensities .label_id .values == table .label_id .values ).all ()
195195
196- # Intensity based analysis.
197196 medians = intensities ["median" ].values
198-
199- # TODO: we need to determine the threshold in a better way / validate it in MoBIE.
200- intensity_threshold = THRESHOLDS .get (cochlea , {}).get (channel , None )
201- if intensity_threshold is None :
202- print ("Could not find a threshold for" , cochlea , channel , "falling back to OTSU" )
203- intensity_threshold = float (threshold_otsu (medians ))
204- threshold_dict [cochlea ][channel ] = intensity_threshold
205-
206- subtype = CHANNEL_TO_TYPE [channel ]
207197 output_table [f"{ channel } _median" ] = medians
208- output_table [f"is_{ subtype } " ] = medians > intensity_threshold
209-
210- # Add the frequency mapping.
211- # TODO
198+ if channel == reference_channel :
199+ reference_intensity = medians
200+ else :
201+ assert reference_intensity is not None
202+ output_table [f"{ channel } _ratio_{ reference_channel } " ] = medians / reference_intensity
212203
213204 out_path = os .path .join (output_folder , f"{ cochlea } _subtype_analysis.tsv" )
214205 output_table = pd .DataFrame (output_table )
215- output_table .to_csv (out_path , sep = "\t " )
206+ output_table .to_csv (out_path , sep = "\t " , index = False )
207+
208+
209+ def _plot_histogram (table , column , name , show_plots ):
210+ data = table [column ].values
211+
212+ # TODO determine automatic threshold
216213
217- threshold_out = os .path .join (output_folder , "thresholds.json" )
218- with open (threshold_out , "w" ) as f :
219- json .dump (threshold_dict , f , sort_keys = True , indent = 4 )
214+ if show_plots :
215+ pass
216+ else :
217+ pass
218+
219+
220+ # TODO enable over-writing by manual thresholds
221+ def analyze_subtype_data (show_plots = True ):
222+ files = sorted (glob ("./subtype_analysis/*.tsv" ))
223+
224+ for ff in files :
225+ cochlea = os .path .basename (ff )[:- len ("_subtype_analysis.tsv" )]
226+ print (cochlea )
227+ channels = COCHLEAE_FOR_SUBTYPES [cochlea ]
228+ reference_channel = "PV" if "PV" in channels else "CR"
229+ assert channels [0 ] == reference_channel
230+
231+ tab = pd .read_csv (ff , sep = "\t " )
232+ breakpoint ()
233+
234+ # 1.) Plot simple intensity histograms, including otsu threshold.
235+ for chan in channels :
236+ column = f"{ chan } _median"
237+ name = f"{ cochlea } _{ chan } _histogram.png"
238+ _plot_histogram (tab , column , name , show_plots )
239+
240+ # 2.) Plot ratio histograms, including otsu threshold.
241+ ratios = {}
242+ # TODO ratio based classification and overlay in 2d plot?
243+ for chan in channels [1 :]:
244+ column = f"{ chan } _median_ratio_{ reference_channel } "
245+ name = f"{ cochlea } _{ chan } _histogram_ratio_{ reference_channel } .png"
246+ _plot_histogram (tab , column , name , show_plots )
247+ ratios [f"{ chan } _{ reference_channel } " ] = tab [column ].values
248+
249+ # 3.) Plot 2D space of ratios.
220250
221251
222252# General notes:
@@ -229,7 +259,9 @@ def main():
229259 missing_tables = check_processing_status ()
230260 require_missing_tables (missing_tables )
231261
232- # analyze_subtypes_intensity_based()
262+ # compile_data_for_subtype_analysis()
263+
264+ # analyze_subtype_data()
233265
234266
235267if __name__ == "__main__" :
0 commit comments