33
44import pandas as pd
55from skimage .filters import threshold_otsu
6- from flamingo_tools .s3_utils import BUCKET_NAME , create_s3_target
6+
7+ from flamingo_tools .s3_utils import BUCKET_NAME , create_s3_target , get_s3_path
8+ from flamingo_tools .measurements import compute_object_measures
79
810# Map from cochlea names to channels
911COCHLEAE_FOR_SUBTYPES = {
1012 "M_LR_000099_L" : ["PV" , "Calb1" , "Lypd1" ],
1113 "M_LR_000214_L" : ["PV" , "CR" , "Calb1" ],
1214 "M_AMD_N62_L" : ["PV" , "CR" , "Calb1" ],
15+ "M_AMD_N180_R" : ["CR" , "Ntng1" , "CTBP2" ],
1316 # Mutant / some stuff is weird.
1417 # "M_AMD_Runx1_L": ["PV", "Lypd1", "Calb1"],
1518 # This one still has to be stitched:
1619 # "M_LR_000184_R": {"PV", "Prph"},
1720 # We don't have PV here, so we exclude these two for now.
1821 # "M_AMD_00N180_L": {"CR", "Ntng1", "Lypd1"},
19- # "M_AMD_00N180_R": {"CR", "Ntng1", "CTBP2"},
2022}
2123
2224# Map from channels to subtypes.
3133 "Calb1" : "Type-Ib" ,
3234 "Lypd1" : "Type-Ic" ,
3335 "Prph" : "Type-II" ,
36+ "Ntng1" : "Type-Ib/c" ,
3437}
3538
36- # TODO
39+ # For custom thresholds.
3740THRESHOLDS = {
3841 "M_LR_000214_L" : {
3942 },
4043 "M_AMD_N62_L" : {
41- "Calb1" : 380 ,
4244 },
4345}
4446
@@ -54,6 +56,8 @@ def check_processing_status():
5456 # print(name)
5557 # breakpoint()
5658
59+ missing_tables = {}
60+
5761 for cochlea , channels in COCHLEAE_FOR_SUBTYPES .items ():
5862 try :
5963 content = s3 .open (f"{ BUCKET_NAME } /{ cochlea } /dataset.json" , mode = "r" , encoding = "utf-8" )
@@ -74,24 +78,87 @@ def check_processing_status():
7478
7579 if "SGN_v2" in sources :
7680 print ("SGN segmentation is present with name SGN_v2" )
81+ seg_name = "SGN-v2"
82+ table_folder = "tables/SGN_v2"
7783 elif "PV_SGN_v2" in sources :
7884 print ("SGN segmentation is present with name PV_SGN_v2" )
85+ seg_name = "PV-SGN-v2"
86+ table_folder = "tables/PV_SGN_v2"
87+ elif "CR_SGN_v2" in sources :
88+ print ("SGN segmentation is present with name CR_SGN_v2" )
89+ seg_name = "CR-SGN-v2"
90+ table_folder = "tables/CR_SGN_v2"
7991 else :
8092 print ("SGN segmentation is MISSING" )
93+ print ()
94+ continue
95+
96+ # Check which tables we have.
97+ expected_tables = [f"{ chan } _{ seg_name } _object-measures.tsv" for chan in channels ]
98+ tables = s3 .ls (os .path .join (BUCKET_NAME , cochlea , table_folder ))
99+ tables = [os .path .basename (tab ) for tab in tables ]
100+
101+ this_missing_tables = []
102+ for exp_tab in expected_tables :
103+ if exp_tab not in tables :
104+ print ("Missing table:" , exp_tab )
105+ this_missing_tables .append (exp_tab )
106+ missing_tables [cochlea ] = this_missing_tables
81107 print ()
82108
109+ return missing_tables
110+
111+
112+ def require_missing_tables (missing_tables ):
113+ output_root = "./object_measurements"
83114
84- def analyze_subtypes_intensity_based ():
115+ for cochlea , missing_tabs in missing_tables .items ():
116+ seg_name = "PV_SGN_v2" if "PV" in COCHLEAE_FOR_SUBTYPES [cochlea ] else "CR_SGN_v2"
117+ for missing in missing_tabs :
118+ channel = missing .split ("_" )[0 ]
119+ print (cochlea , channel )
120+
121+ img_s3 = f"{ cochlea } /images/ome-zarr/{ channel } .ome.zarr"
122+ seg_s3 = f"{ cochlea } /images/ome-zarr/{ seg_name } .ome.zarr"
123+ seg_table_s3 = f"{ cochlea } /tables/{ seg_name } /default.tsv"
124+ img_path , _ = get_s3_path (img_s3 )
125+ seg_path , _ = get_s3_path (seg_s3 )
126+
127+ output_folder = os .path .join (output_root , cochlea )
128+ os .makedirs (output_folder , exist_ok = True )
129+ output_table_path = os .path .join (output_folder , f"{ channel } _{ seg_name } _object-measures.tsv" )
130+ compute_object_measures (
131+ image_path = img_path ,
132+ segmentation_path = seg_path ,
133+ segmentation_table_path = seg_table_s3 ,
134+ output_table_path = output_table_path ,
135+ image_key = "s0" ,
136+ segmentation_key = "s0" ,
137+ s3_flag = True ,
138+ component_list = [1 ],
139+ n_threads = 8 ,
140+ )
141+ return
142+
143+ # TODO S3 upload
144+
145+
146+ def get_data_for_subtype_analysis ():
85147 s3 = create_s3_target ()
86- seg_name = "PV_SGN_v2"
87148
88149 threshold_dict = {}
89150 output_folder = "./subtype_analysis"
90151 os .makedirs (output_folder , exist_ok = True )
91152
92153 for cochlea , channels in COCHLEAE_FOR_SUBTYPES .items ():
93- # Remove the PV channel, which we don't need for analysis.
94- channels = channels [1 :]
154+ if "PV" in channels :
155+ reference_channel = "PV"
156+ seg_name = "PV_SGN_v2"
157+ else :
158+ assert "CR" in channels
159+ reference_channel = "CR"
160+ seg_name = "CR_SGN_v2"
161+ reference_channel , seg_name
95162
96163 content = s3 .open (f"{ BUCKET_NAME } /{ cochlea } /dataset.json" , mode = "r" , encoding = "utf-8" )
97164 info = json .loads (content .read ())
@@ -157,10 +224,12 @@ def analyze_subtypes_intensity_based():
157224# Double check if this is the right channel. Maybe we try domain adaptation here?
158225# M_LR_000214_L: PV looks correct, segmentation is not there yet.
159226# M_AMD_N62_L: PV signal and segmentation look good.
160- # M_AMD_Runx1_L: PV looks a bit off, but should work. Segmentation is not there yet .
227+ # M_AMD_N180_R: Need SGN segmentation based on CR .
161228def main ():
162- # check_processing_status()
163- analyze_subtypes_intensity_based ()
229+ missing_tables = check_processing_status ()
230+ require_missing_tables (missing_tables )
231+
232+ # analyze_subtypes_intensity_based()
164233
165234
166235if __name__ == "__main__" :
0 commit comments