55import numpy as np
66from tqdm import tqdm
77from skimage .transform import resize
8+ from skimage .measure import label
9+ from scipy .ndimage import binary_closing
810
9- ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/AZ_data/training_data"
10- INTER_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/AZ_predictions"
11- OUTPUT_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/new_AZ_train_data"
11+ ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_data/training_data"
12+ INTER_ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_predictions"
13+ OUTPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data"
14+ STEM_INPUT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/for_revison/postprocessed_AZ"
1215
1316
1417def _check_data (files , label_folder , check_thinned ):
@@ -195,6 +198,70 @@ def crop_wichmann():
195198 f .create_dataset ("labels/az" , data = az , compression = "lzf" )
196199 f .create_dataset ("labels/az_thin" , data = az , compression = "lzf" )
197200
201+ def crop_stem ():
202+ input_name = "04_hoi_stem_examples_minusSVseg"
203+ output_name = "stem_cropped2"
204+
205+ input_folder = os .path .join (STEM_INPUT , input_name )
206+ output_folder = os .path .join (OUTPUT_ROOT , output_name )
207+ os .makedirs (output_folder , exist_ok = True )
208+ files = glob (os .path .join (input_folder , "*.h5" ))
209+
210+ min_shape = (32 , 512 , 512 )
211+
212+ for ff in tqdm (files ):
213+ with h5py .File (ff , "r" ) as f :
214+ az = f ["labels/az" ][:]
215+ raw_full = f ["raw" ][:]
216+
217+ # Label connected components in the az volume
218+ labeled = label (az )
219+ num , sizes = np .unique (labeled , return_counts = True )
220+ #print(f"num {num}, sizes {sizes}")
221+ num , sizes = num [1 :], sizes [1 :]
222+
223+ #exclude artifacts and background
224+ keep_labels = num [(sizes > 2000 ) & (num != 0 )]
225+ #print(f"keep_labels {keep_labels}")
226+
227+ #Clean up az annotations
228+ az = np .isin (labeled , keep_labels ).astype ("uint8" )
229+ # Apply binary closing.
230+ az = np .logical_or (az , binary_closing (az , iterations = 4 )).astype ("uint8" )
231+
232+ crop_id = 1
233+ for l in keep_labels :
234+
235+ output_path = os .path .join (output_folder , os .path .basename (ff ).replace (".h5" , f"_crop{ crop_id } .h5" ))
236+ if os .path .exists (output_path ):
237+ print (f"Skipping existing file: { output_path } " )
238+ crop_id += 1
239+ continue
240+
241+
242+ mask = labeled == l
243+ bb = np .where (mask )
244+ if not bb [0 ].size :
245+ continue
246+ bb = tuple (slice (int (b .min ()), int (b .max ()) + 1 ) for b in bb )
247+ pad_width = [max (sh - (b .stop - b .start ), 0 ) // 2 for b , sh in zip (bb , min_shape )]
248+ bb = tuple (
249+ slice (max (b .start - pw , 0 ), min (b .stop + pw , sh )) for b , pw , sh in zip (bb , pad_width , az .shape )
250+ )
251+ az_crop = az [bb ]
252+ raw_crop = raw_full [bb ]
253+
254+
255+ import napari
256+ v = napari .Viewer ()
257+ v .add_image (raw_crop )
258+ v .add_labels (az_crop )
259+ napari .run ()
260+
261+ with h5py .File (output_path , "a" ) as f :
262+ f .create_dataset ("raw" , data = raw_crop , compression = "lzf" )
263+ f .create_dataset ("labels/az" , data = az_crop , compression = "lzf" )
264+ crop_id += 1
198265
199266def main ():
200267 # assort_tem()
@@ -203,7 +270,9 @@ def main():
203270 # assort_stem()
204271
205272 # assort_wichmann()
206- crop_wichmann ()
273+ #crop_wichmann()
274+
275+ crop_stem ()
207276
208277
209278if __name__ == "__main__" :
0 commit comments