1+ import  numpy  as  np 
2+ 
3+ from  synapse_net .file_utils  import  read_mrc 
4+ import  mrcfile 
5+ import  h5py 
6+ import  tifffile 
7+ from  pathlib  import  Path 
8+ import  re 
9+ 
10+ def  apply_ignore_label (h5_path , mask_path , ignore_label : int = - 1 ):
11+     """For supervised training: set masked voxels to -1 (ignore_label).""" 
12+     with  h5py .File (h5_path , "r" ) as  f :
13+         raw  =  f ["raw" ][:]
14+         labels  =  f ["labels/actin" ][:]
15+ 
16+     with  mrcfile .open (mask_path , permissive = True ) as  mrc :
17+         ignore_mask  =  mrc .data .astype (bool )
18+         #ignore_mask = np.flip(ignore_mask, axis=1) 
19+     
20+     labels_masked  =  labels .astype (np .int32 ) # ensure signed int type  
21+     labels_masked [(labels  ==  0 ) &  ignore_mask ] =  ignore_label  
22+ 
23+     out_dir  =  Path (h5_path ).parent  /  "ignore_label" 
24+     out_dir .mkdir (parents = True , exist_ok = True )
25+     fstem  =  Path (h5_path ).stem 
26+     out_path  =  out_dir  /  f"{ fstem }  .h5" 
27+ 
28+     print (f"Writing out h5 file with masked labels to { out_path }  ." )
29+     with  h5py .File (out_path , "w" ) as  f :
30+         f .create_dataset ("raw" , data = raw , compression = "gzip" )
31+         f .create_dataset ("/labels/actin" , data = labels_masked , compression = "gzip" )
32+     
33+ def  convert_tiff2mrc (in_dir , pixel_size , out_dir = None ): 
34+     """Batch convert tiff files to mrc.""" 
35+     in_dir  =  Path (in_dir )
36+ 
37+     if  out_dir  ==  None :
38+         out_dir  =  in_dir 
39+     else :
40+         out_dir  =  Path (out_dir )
41+         out_dir .mkdir (parents = True , exist_ok = True )
42+ 
43+     path_list  =  [str (p ) for  p  in  in_dir .glob ("*.tif" )]
44+     
45+     for  path  in  path_list :
46+         data  =  tifffile .imread (path )
47+         data  =  np .flip (data , axis = 1 )
48+         filename  =  Path (path ).stem 
49+         out_path  =  out_dir  /  f"{ filename }  .mrc" 
50+         
51+         print (f"Writing out mrc file to { out_path }  ." )
52+         with  mrcfile .new (out_path , overwrite = True ) as  mrc :
53+             mrc .set_data (data .astype (np .uint8 ))
54+             mrc .voxel_size  =  (pixel_size , pixel_size , pixel_size )
55+ 
56+ def  h5_split_tomograms (h5_path , z_range ):
57+     """ 
58+     Split paired raw and label data (z,y,x) into 8 non-overlapping subvolumes 
59+     by cutting it in half along each axis. 
60+     """ 
61+     with  h5py .File (h5_path , "r" ) as  f :
62+         z0 , z1  =  z_range 
63+         raw  =  f ["raw" ][z0 :z1 , :, :]
64+         labels  =  f ["labels/actin" ][z0 :z1 , :, :]
65+ 
66+     z , y , x  =  raw .shape 
67+ 
68+     # Compute midpoints 
69+     z_mid , y_mid , x_mid  =  z  //  2 , y  //  2 , x  //  2 
70+ 
71+     # Define ranges for each half 
72+     z_ranges  =  [(0 , z_mid ), (z_mid , z )]
73+     y_ranges  =  [(0 , y_mid ), (y_mid , y )]
74+     x_ranges  =  [(0 , x_mid ), (x_mid , x )]
75+ 
76+     raw_subvols , label_subvols  =  [], []
77+ 
78+     for  zi , (z0 , z1 ) in  enumerate (z_ranges ):
79+         for  yi , (y0 , y1 ) in  enumerate (y_ranges ):
80+             for  xi , (x0 , x1 ) in  enumerate (x_ranges ):
81+                 raw_subvol  =  raw [z0 :z1 , y0 :y1 , x0 :x1 ]
82+                 label_subvol  =  labels [z0 :z1 , y0 :y1 , x0 :x1 ]
83+                 raw_subvols .append (raw_subvol )
84+                 label_subvols .append (label_subvol )
85+ 
86+     return  raw_subvols , label_subvols 
87+ 
88+ def  write_h5 (raw_path , label_path , out_path ):
89+     """Write the raw and labels to an HDF5 file.""" 
90+     if  out_path .exists ():
91+         print (f"File { out_path }   already exists, skipping." )
92+         return 
93+     
94+     raw  =  read_mrc (raw_path )[0 ]
95+     labels  =  read_mrc (label_path )[0 ]
96+ 
97+     print (f"Writing file to { out_path }  ." )
98+     with  h5py .File (out_path , "w" ) as  f :
99+         f .create_dataset ("raw" , data = raw , compression = "gzip" )
100+         f .create_dataset ("/labels/actin" , data = labels , compression = "gzip" )
101+ 
102+ def  write_h5_deepict ():
103+     PARENT_DIR  =  Path ("/mnt/data1/sage/actin-segmentation/data/deepict/deepict_actin/ignore_label" )
104+     TRAIN_DIR  =  PARENT_DIR  /  "train" 
105+     VAL_DIR  =  PARENT_DIR  /  "val" 
106+     TEST_DIR  =  PARENT_DIR  /  "test" 
107+ 
108+     TRAIN_DIR .mkdir (exist_ok = True )
109+     VAL_DIR .mkdir (exist_ok = True )
110+     TEST_DIR .mkdir (exist_ok = True )
111+ 
112+     raw_subvols1 , label_subvols1  =  h5_split_tomograms (
113+         Path (PARENT_DIR  /  "00004_cleaned.h5" ), z_range  =  (326 , 464 )
114+         )
115+     raw_subvols2 , label_subvols2  =  h5_split_tomograms (
116+         Path (PARENT_DIR  /  "00012_cleaned.h5" ), z_range  =  (147 , 349 )
117+         )
118+ 
119+     raw_subvols  =  raw_subvols1  +  raw_subvols2 
120+     label_subvols  =  label_subvols1  +  label_subvols2 
121+ 
122+     # predefined indices for train, val, test (10:2:4) 
123+     train_idx  =  [0 , 3 , 4 , 7 , 8 , 9 , 10 , 11 , 12 , 15 ]
124+     val_idx  =  [6 , 14 ]
125+     test_idx  =  [1 , 2 , 5 , 13 ]
126+ 
127+     def  write_split (idx_list , folder , prefix ):
128+         for  idx  in  idx_list :
129+             raw  =  raw_subvols [idx ]
130+             labels  =  label_subvols [idx ]
131+ 
132+             # tomogram 00004: indices 0-7 -> A 
133+             # tomogram 00012: indices 8-15 -> B 
134+             if  idx  <  8 :
135+                 tag  =  f"A{ idx }  " 
136+             else :
137+                 tag  =  f"B{ idx  -  8 }  "  
138+             out_path  =  folder  /  f"{ prefix }  _{ tag }  .h5" 
139+ 
140+             print (f"Writing file to { out_path }  ." )
141+             with  h5py .File (out_path , "w" ) as  f :
142+                 f .create_dataset ("raw" , data = raw , compression = "gzip" )
143+                 f .create_dataset ("/labels/actin" , data = labels , compression = "gzip" )
144+ 
145+     write_split (train_idx , TRAIN_DIR , "train" )
146+     write_split (val_idx , VAL_DIR , "val" )
147+     write_split (test_idx , TEST_DIR , "test" )
148+     print ("\n  Finished writing all subvolumes." )
149+ 
150+ def  write_h5_optogenetics ():
151+     RAW_DIR  =  Path ("/mnt/data1/sage/actin-segmentation/data/EMPIAR-12292/tomos/" )
152+     LABEL_DIR  =  Path ("/mnt/data1/sage/actin-segmentation/data/EMPIAR-12292/labels/" )
153+     OUT_DIR  =  Path ("/mnt/data1/sage/actin-segmentation/data/EMPIAR-12292/h5/" )
154+ 
155+     raw_paths  =  {re .sub ('_rec' , '' , f .stem ): f  for  f  in  RAW_DIR .glob ("*_rec.mrc" )}
156+     label_paths  =  {re .sub ('_mask' , '' , f .stem ): f  for  f  in  LABEL_DIR .glob ("*_mask.mrc" )}
157+ 
158+     stems  =  raw_paths .keys () |  label_paths .keys ()
159+ 
160+     for  stem  in  stems :
161+         if  stem  not  in   raw_paths :
162+             print (f"Warning: Missing tomo file for { stem }  ." )
163+             continue 
164+ 
165+         if  stem  not  in   label_paths :
166+             print (f"Warning: Missing label file for { stem }  ." )
167+             continue 
168+ 
169+         raw_path  =  raw_paths [stem ]
170+         label_path  =  label_paths [stem ]
171+         out_path  =  OUT_DIR  /  f"{ stem }  .h5" 
172+         write_h5 (raw_path , label_path , out_path )
173+ 
174+ def  main ():
175+     #write_h5_optogenetics() 
176+     #write_h5_deepict() 
177+     #convert_tiff2mrc( 
178+     #    input_dir = "/mnt/data1/sage/actin-segmentation/data/deepict/deepict_actin/background_masks", 
179+     #    pixel_size = 13.48 
180+     #) 
181+ 
182+ 
183+     # apply ignore label for masking background during supervised training  
184+     PARENT_DIR  =  Path ("/mnt/data1/sage/actin-segmentation/data/deepict/deepict_actin/" )
185+     MASK_DIR  =  PARENT_DIR  /  "background_masks" 
186+     h5_paths  =  [PARENT_DIR  /  "00004_cleaned.h5" , PARENT_DIR  /  "00012_cleaned.h5" ]
187+     mask_paths  =  [MASK_DIR  /  "00004.mrc" , MASK_DIR  /  "00012.mrc" ]
188+ 
189+     for  i , (path1 , path2 ) in  enumerate (zip (h5_paths , mask_paths )): 
190+         apply_ignore_label (path1 , path2 )
191+     
192+     write_h5_deepict ()
193+ 
194+ if  __name__  ==  "__main__" :
195+     main ()
0 commit comments