33from pathlib import Path
44
55import h5py
6+ import imageio .v3 as imageio
67import napari
78import numpy as np
89import pandas as pd
@@ -19,34 +20,61 @@ def get_voxel_size(imaris_file):
1920 return vsize
2021
2122
22- def extract_training_data (imaris_file , output_folder , crop = True , scale = True ):
23+ def get_transformation (imaris_file ):
24+ with h5py .File (imaris_file ) as f :
25+ info = f ["DataSetInfo" ]["Image" ].attrs
26+ ext_min = np .array ([float (b"" .join (info [f"ExtMin{ i } " ]).decode ()) for i in range (3 )])
27+ ext_max = np .array ([float (b"" .join (info [f"ExtMax{ i } " ]).decode ()) for i in range (3 )])
28+ size = [int (b"" .join (info [dim ]).decode ()) for dim in ["X" , "Y" , "Z" ]]
29+ spacing = (ext_max - ext_min ) / size # µm / voxel
30+
31+ # build 4×4 affine: world → index
32+ T = np .eye (4 )
33+ T [:3 , :3 ] = np .diag (1 / spacing ) # scale
34+ T [:3 , 3 ] = - ext_min / spacing # translate
35+
36+ return T
37+
38+
39+ def extract_training_data (imaris_file , output_folder , tif_file = None , crop = True ):
2340 point_key = "/Scene/Content/Points0/CoordsXYZR"
2441 with h5py .File (imaris_file , "r" ) as f :
2542 if point_key not in f :
2643 print ("Skipping" , imaris_file , "due to missing annotations" )
2744 return
28- data = f ["/DataSet/ResolutionLevel 0/TimePoint 0/Channel 0/Data" ][:]
2945 points = f [point_key ][:]
3046 points = points [:, :- 1 ]
31- points = points [:, ::- 1 ]
3247
33- # TODO crop the data to the original shape.
34- # Can we just crop the zero-padding ?!
48+ g = f ["/DataSet/ResolutionLevel 0/TimePoint 0" ]
49+ # The first channel is ctbp2 / the synapse marker channel.
50+ data = g ["Channel 0/Data" ][:]
51+ # The second channel is vglut / the ihc channel.
52+ if "Channel 1" in g :
53+ ihc_data = g ["Channel 1/Data" ][:]
54+ else :
55+ ihc_data = None
56+
57+ T = get_transformation (imaris_file )
58+ points = (T @ np .c_ [points , np .ones (len (points ))].T ).T [:, :3 ]
59+ points = points [:, ::- 1 ]
60+
3561 if crop :
3662 crop_box = np .where (data != 0 )
3763 crop_box = tuple (slice (0 , int (cb .max () + 1 )) for cb in crop_box )
3864 data = data [crop_box ]
3965
40- # Scale the points to match the image dimensions.
41- voxel_size = get_voxel_size (imaris_file )
42- if scale :
43- points /= voxel_size [None ]
44-
45- print (data .shape , voxel_size )
66+ if tif_file is None :
67+ original_data = None
68+ else :
69+ original_data = imageio .imread (tif_file )
4670
4771 if output_folder is None :
4872 v = napari .Viewer ()
4973 v .add_image (data )
74+ if ihc_data is not None :
75+ v .add_image (ihc_data )
76+ if original_data is not None :
77+ v .add_image (original_data , visible = False )
5078 v .add_points (points )
5179 v .title = os .path .basename (imaris_file )
5280 napari .run ()
@@ -66,6 +94,8 @@ def extract_training_data(imaris_file, output_folder, crop=True, scale=True):
6694
6795 f = zarr .open (image_file , "a" )
6896 f .create_dataset ("raw" , data = data )
97+ if ihc_data is not None :
98+ f .create_dataset ("raw_ihc" , data = ihc_data )
6999
70100
71101# Files that look good for training:
@@ -82,6 +112,21 @@ def process_training_data_v1():
82112 extract_training_data (ff , output_folder = "./training_data" )
83113
84114
115+ def _match_tif (imaris_file ):
116+ folder = os .path .split (imaris_file )[0 ]
117+
118+ fname = os .path .basename (imaris_file )
119+ parts = fname .split ("_" )
120+ cochlea = parts [0 ].upper ()
121+ region = parts [1 ]
122+
123+ tif_name = f"{ cochlea } _{ region } _CTBP2.tif"
124+ tif_path = os .path .join (folder , tif_name )
125+ assert os .path .exists (tif_path ), tif_path
126+
127+ return tif_path
128+
129+
85130def process_training_data_v2 (visualize = True ):
86131 input_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/ImageCropsIHC_synapses"
87132
@@ -110,16 +155,46 @@ def process_training_data_v2(visualize=True):
110155
111156 imaris_files = sorted (glob (os .path .join (input_root , folder , "*.ims" )))
112157 for imaris_file in imaris_files :
113- fname = os .path .basename (imaris_file )
114- if fname not in valid_files :
158+ if os .path .basename (imaris_file ) not in valid_files :
159+ continue
160+ extract_training_data (imaris_file , output_folder , tif_file = None , crop = True , scale = True )
161+
162+
163+ # We have fixed the imaris data extraction problem and can use all the crops!
164+ def process_training_data_v3 (visualize = True ):
165+ input_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/ImageCropsIHC_synapses"
166+
167+ train_output = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/training_data/v3" # noqa
168+ test_output = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3" # noqa
169+
170+ train_folders = ["synapse_stains" , "M78L_IHC-synapse_crops" , "M226R_IHC-synapsecrops" ]
171+ test_folders = ["M226L_IHC-synapse_crops" ]
172+
173+ exclude_names = ["220824_Ex3IL_rbCAST1635_mCtBP2580_chCR488_cell1_CtBP2spots.ims" ]
174+
175+ for folder in train_folders + test_folders :
176+
177+ if visualize :
178+ output_folder = None
179+ elif folder in train_folders :
180+ output_folder = train_output
181+ os .makedirs (output_folder , exist_ok = True )
182+ else :
183+ output_folder = test_output
184+ os .makedirs (output_folder , exist_ok = True )
185+
186+ imaris_files = sorted (glob (os .path .join (input_root , folder , "*.ims" )))
187+ for imaris_file in imaris_files :
188+ if os .path .basename (imaris_file ) in exclude_names :
189+ print ("Skipping" , imaris_file )
115190 continue
116- print (fname )
117- extract_training_data (imaris_file , output_folder , crop = True , scale = True )
191+ extract_training_data (imaris_file , output_folder , tif_file = None , crop = True )
118192
119193
120194def main ():
121195 # process_training_data_v1()
122- process_training_data_v2 (visualize = False )
196+ # process_training_data_v2(visualize=True)
197+ process_training_data_v3 (visualize = False )
123198
124199
125200if __name__ == "__main__" :
0 commit comments