11import multiprocessing as mp
22import os
3+ import re
34
45from glob import glob
56from pathlib import Path
@@ -58,19 +59,15 @@ def _read_start_position_flamingo(path):
5859 return start_position
5960
6061
61- def read_metadata_flamingo (metadata_paths , center_tiles ):
62- start_positions = []
62+ def read_metadata_flamingo (metadata_path , offset = None ):
6363 resolution , unit = None , None
64- for path in metadata_paths :
65- resolution , unit = _read_resolution_and_unit_flamingo (path )
66- start_position = _read_start_position_flamingo (path )
67- start_positions .append (start_position )
6864
69- start_positions = np . array ( start_positions )
70- offset = np . min ( start_positions , axis = 0 ) if center_tiles else np . array ([ 0.0 , 0.0 , 0.0 ] )
65+ resolution , unit = _read_resolution_and_unit_flamingo ( metadata_path )
66+ start_position = _read_start_position_flamingo ( metadata_path )
7167
7268 def _pos_to_trafo (pos ):
73- pos -= offset
69+ if offset is not None :
70+ pos -= offset
7471
7572 # FIXME: dirty hack
7673 # scale = 4
@@ -97,11 +94,9 @@ def _pos_to_trafo(pos):
9794 }
9895 return trafo
9996
100- transformations = [
101- _pos_to_trafo (pos ) for pos in start_positions
102- ]
97+ transformation = _pos_to_trafo (start_position )
10398 # We have to reverse the resolution because pybdv expects ZYX.
104- return resolution [::- 1 ], unit , transformations
99+ return resolution [::- 1 ], unit , transformation
105100
106101
107102# TODO derive the scale factors from the shape rather than hard-coding it to 5 levels
@@ -110,30 +105,15 @@ def derive_scale_factors(shape):
110105 return scale_factors
111106
112107
113- def _to_bdv (
114- data , out_path , scale_factors , n_threads , resolution , unit , channel_id , channel_name , tile_id , tile_transformation
115- ):
116- pybdv .make_bdv (
117- data , out_path ,
118- downscale_factors = scale_factors , downscale_mode = "mean" ,
119- n_threads = n_threads ,
120- resolution = resolution , unit = unit ,
121- attributes = {
122- "channel" : {"id" : channel_id , "name" : channel_name }, "tile" : {"id" : tile_id , "name" : str (tile_id )},
123- "angle" : {"id" : 0 , "name" : "0" }, "illumination" : {"id" : 0 , "name" : "0" }
124- },
125- affine = tile_transformation ,
126- )
127-
108+ def _to_ome_zarr (data , out_path , scale_factors , timepoint , setup_id , attributes , unit , resolution ):
109+ n_threads = mp .cpu_count ()
110+ chunks = (128 , 128 , 128 )
128111
129- def _to_ome_zarr (
130- data , out_path , scale_factors , n_threads , resolution , unit , channel_id , channel_name , tile_id , tile_transformation
131- ):
132112 # Write the base dataset.
133- base_key = f"c { channel_id } -t { tile_id } "
134- chunks = ( 128 , 128 , 128 )
113+ base_key = f"setup { setup_id } /timepoint { timepoint } "
114+
135115 with open_file (out_path , "a" ) as f :
136- ds = f .create_dataset (f"{ base_key } /s0" , shape = data .shape , compression = ' gzip' ,
116+ ds = f .create_dataset (f"{ base_key } /s0" , shape = data .shape , compression = " gzip" ,
137117 chunks = chunks , dtype = data .dtype )
138118 ds .n_threads = n_threads
139119 ds [:] = data
@@ -143,27 +123,70 @@ def _to_ome_zarr(
143123 for level , scale_factor in enumerate (scale_factors , 1 ):
144124 inv_scale = [1.0 / sc for sc in scale_factor ]
145125 data = rescale (data , inv_scale , preserve_range = True ).astype (data .dtype )
146- ds = f .create_dataset (f"{ base_key } /s{ level } " , shape = data .shape , compression = ' gzip' ,
126+ ds = f .create_dataset (f"{ base_key } /s{ level } " , shape = data .shape , compression = " gzip" ,
147127 chunks = chunks , dtype = data .dtype )
148128 ds .n_threads = n_threads
149129 ds [:] = data
150130
131+ g = f [f"setup{ setup_id } " ]
132+ g .attrs .update (attributes )
133+
151134 # Write the ome zarr metadata.
152135 metadata_dict = {"unit" : unit , "resolution" : resolution }
153136 write_format_metadata (
154137 "ome.zarr" , out_path , metadata_dict , scale_factors = scale_factors , prefix = base_key
155138 )
156139
157140
141+ def flamingo_filename_parser (file_path , name_mapping ):
142+ filename = os .path .basename (file_path )
143+
144+ # Extract the timepoint.
145+ match = re .search (r'_t(\d+)_' , filename )
146+ if match :
147+ timepoint = int (match .group (1 ))
148+ else :
149+ timepoint = 0
150+
151+ # Extract the additional attributes.
152+ attributes = {}
153+ if name_mapping is None :
154+ name_mapping = {}
155+
156+ # Extract the channel.
157+ match = re .search (r'_C(\d+)_' , filename )
158+ channel = int (match .group (1 )) if match else 0
159+ channel_mapping = name_mapping .get ("channel" , {})
160+ attributes ["channel" ] = {"id" : channel , "name" : channel_mapping .get (channel , str (channel ))}
161+
162+ # Extract the tile.
163+ match = re .search (r'_R(\d+)_' , filename )
164+ tile = int (match .group (1 )) if match else 0
165+ tile_mapping = name_mapping .get ("tile" , {})
166+ attributes ["tile" ] = {"id" : tile , "name" : tile_mapping .get (tile , str (tile ))}
167+
168+ # Extract the illumination.
169+ match = re .search (r'_I(\d+)_' , filename )
170+ illumination = int (match .group (1 )) if match else 0
171+ illumination_mapping = name_mapping .get ("illumination" , {})
172+ attributes ["illumination" ] = {"id" : illumination , "name" : illumination_mapping .get (illumination , str (illumination ))}
173+
174+ # BDV also supports an angle attribute, but it does not seem to be stored in the filename
175+ # "angle": {"id": 0, "name": "0"}
176+
177+ attribute_id = f"c{ channel } -t{ tile } -i{ illumination } "
178+ return timepoint , attributes , attribute_id
179+
180+
158181def convert_lightsheet_to_bdv (
159182 root : str ,
160- channel_folders : Dict [str , str ],
161- image_file_name_pattern : str ,
162183 out_path : str ,
184+ attribute_parser : callable = flamingo_filename_parser ,
185+ attribute_names : Optional [Dict [str , Dict [int , str ]]] = None ,
163186 metadata_file_name_pattern : Optional [str ] = None ,
164187 metadata_root : Optional [str ] = None ,
165188 metadata_type : str = "flamingo" ,
166- center_tiles : bool = True ,
189+ center_tiles : bool = False ,
167190 resolution : Optional [List [float ]] = None ,
168191 unit : Optional [str ] = None ,
169192 scale_factors : Optional [List [List [int ]]] = None ,
@@ -174,24 +197,14 @@ def convert_lightsheet_to_bdv(
174197 The data is converted to the bdv-n5 file format and can be opened with BigDataViewer
175198 or BigStitcher. This function is written with data layout and metadata of flamingo
176199 microscopes in mind, but could potentially be adapted to other data formats.
177- We currently don't support multiple timepoints, but support can be added if needed.
178200
179- This function assumes the following input data format:
180- <ROOT>/<CHANNEL1>/<TILE1>.tif
181- /<TILE2>.tif
182- /...
183- /<CHANNEL2>/<TILE1>.tif
184- /<TILE2>.tif
185- /...
201+ TODO explain the attribute parsing.
186202
187203 Args:
188- root: Folder that contains the folders with tifs for each channel.
189- channel_folders: Dictionary that maps the name of each channel to the corresponding folder name
190- underneath the root folder.
191- image_file_name_pattern: The pattern for the names of the tifs that contain the data.
192- This expects a glob pattern (name with '*') to select the corresponding tif files .
193- The simplest pattern that should work in most cases is '*.tif'.
204+ root: Folder that contains the image data stored as tifs.
205+ This function will take into account all tif files in folders beneath this root directory.
194206 out_path: Output path where the converted data is saved.
207+ attribute_parser: TODO
195208 metadata_file_name_pattern: The pattern for the names of files that contain the metadata.
196209 For flamingo metadata the following pattern should work: '*_Settings.txt'.
197210 metadata_root: Different root folder for the metadata. By default 'root' is used here as well.
@@ -216,60 +229,81 @@ def convert_lightsheet_to_bdv(
216229
217230 # Make sure we convert to n5, in case no extension is passed.
218231 ext = os .path .splitext (out_path )[1 ]
232+ convert_to_ome_zarr = False
219233 if ext == "" :
220234 out_path = str (Path (out_path ).with_suffix (".n5" ))
221- conversion_function = _to_bdv
222235 elif ext == ".zarr" :
223- conversion_function = _to_ome_zarr
224- else :
225- conversion_function = _to_bdv
236+ convert_to_ome_zarr = True
226237
227- # Iterate over the channels
228- for channel_id , (channel_name , channel_folder ) in enumerate (channel_folders .items ()):
229-
230- # Get all the image file paths for this channel.
231- tile_pattern = os .path .join (root , channel_folder , image_file_name_pattern )
232- file_paths = sorted (glob (tile_pattern ))
233- assert len (file_paths ) > 0 , tile_pattern
238+ files = sorted (glob (os .path .join (root , "**/*.tif" ), recursive = True ))
239+ if metadata_file_name_pattern is None :
240+ metadata_files = [None ] * len (files )
241+ offset = None
242+ else :
243+ metadata_files = sorted (
244+ glob (
245+ os .path .join (root if metadata_root is None else metadata_root , f"**/{ metadata_file_name_pattern } " ),
246+ recursive = True
247+ )
248+ )
249+ assert len (metadata_files ) == len (files )
250+
251+ if center_tiles :
252+ start_positions = []
253+ for mpath in metadata_files :
254+ start_positions .append (_read_start_position_flamingo (mpath ))
255+ offset = np .min (start_positions , axis = 0 )
256+ else :
257+ offset = None
258+
259+ next_setup_id = 0
260+ attrs_to_setups = {}
261+
262+ for file_path , metadata_file in zip (files , metadata_files ):
263+ timepoint , attributes , aid = attribute_parser (file_path , attribute_names )
264+
265+ if aid in attrs_to_setups :
266+ setup_id = attrs_to_setups [aid ]
267+ else :
268+ attrs_to_setups [aid ] = next_setup_id
269+ setup_id = next_setup_id
270+ next_setup_id += 1
234271
235272 # Read the metadata if it was given.
236- if metadata_file_name_pattern is None : # No metadata given.
273+ if metadata_file is None : # No metadata given.
237274 # We don't use any tile transformation.
238- tile_transformations = [ None ] * len ( file_paths )
275+ tile_transformation = None
239276 # Set resolution and unit to their default values if they were not passed.
240277 if resolution is None :
241278 resolution = [1.0 , 1.0 , 1.0 ]
242279 if unit is None :
243280 unit = "pixel"
244281
245282 else : # We have metadata and read it.
246- metadata_pattern = os .path .join (
247- root if metadata_root is None else metadata_root ,
248- channel_folder , metadata_file_name_pattern
249- )
250- metadata_paths = sorted (glob (metadata_pattern ))
251- assert len (metadata_paths ) == len (file_paths )
252- resolution , unit , tile_transformations = read_metadata_flamingo (metadata_paths , center_tiles )
253-
254- if channel_name is None or channel_name .strip () == "" : # channel name is empty, assign channel id as name
255- channel_name = str (channel_id )
256-
257- for tile_id , (file_path , tile_transformation ) in enumerate (zip (file_paths , tile_transformations )):
258-
259- # Try to memmap the data. If that doesn't work fall back to loading it into memory.
260- try :
261- data = tifffile .memmap (file_path , mode = "r" )
262- except ValueError :
263- print (f"Could not memmap the data from { file_path } . Fall back to load it into memory." )
264- data = tifffile .imread (file_path )
265-
266- print ("Converting channel" , channel_id , "tile" , tile_id , "from" , file_path , "with shape" , data .shape )
267- if scale_factors is None :
268- scale_factors = derive_scale_factors (data .shape )
269-
270- conversion_function (
271- data , out_path , scale_factors , n_threads , resolution , unit ,
272- channel_id , channel_name , tile_id , tile_transformation
283+ resolution , unit , tile_transformation = read_metadata_flamingo (metadata_file , offset )
284+
285+ try :
286+ data = tifffile .memmap (file_path , mode = "r" )
287+ except ValueError :
288+ print (f"Could not memmap the data from { file_path } . Fall back to load it into memory." )
289+ data = tifffile .imread (file_path )
290+
291+ print (f"Converting tp={ timepoint } , channel={ attributes ['channel' ]} , tile={ attributes ['tile' ]} " )
292+ if scale_factors is None :
293+ scale_factors = derive_scale_factors (data .shape )
294+
295+ if convert_to_ome_zarr :
296+ _to_ome_zarr (data , out_path , scale_factors , timepoint , setup_id , attributes , unit , resolution )
297+ else :
298+ pybdv .make_bdv (
299+ data , out_path ,
300+ downscale_factors = scale_factors , downscale_mode = "mean" ,
301+ n_threads = n_threads ,
302+ resolution = resolution , unit = unit ,
303+ attributes = attributes ,
304+ affine = tile_transformation ,
305+ timepoint = timepoint ,
306+ setup_id = setup_id ,
273307 )
274308
275309
0 commit comments