Skip to content

Commit 8f601ed

Browse files
Improve flamingo filename parsing
1 parent adbe319 commit 8f601ed

File tree

4 files changed

+123
-74
lines changed

4 files changed

+123
-74
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
synthetic_data/
22
__pycache__/
33
converted/
4+
*.egg-info/

flamingo_tools/data_conversion.py

Lines changed: 112 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import multiprocessing as mp
22
import os
3+
import re
34

45
from glob import glob
56
from pathlib import Path
@@ -54,19 +55,15 @@ def _read_start_position_flamingo(path):
5455
return start_position
5556

5657

57-
def read_metadata_flamingo(metadata_paths, center_tiles):
58-
start_positions = []
58+
def read_metadata_flamingo(metadata_path, offset=None):
5959
resolution, unit = None, None
60-
for path in metadata_paths:
61-
resolution, unit = _read_resolution_and_unit_flamingo(path)
62-
start_position = _read_start_position_flamingo(path)
63-
start_positions.append(start_position)
6460

65-
start_positions = np.array(start_positions)
66-
offset = np.min(start_positions, axis=0) if center_tiles else np.array([0.0, 0.0, 0.0])
61+
resolution, unit = _read_resolution_and_unit_flamingo(metadata_path)
62+
start_position = _read_start_position_flamingo(metadata_path)
6763

6864
def _pos_to_trafo(pos):
69-
pos -= offset
65+
if offset is not None:
66+
pos -= offset
7067

7168
# FIXME: dirty hack
7269
# scale = 4
@@ -93,11 +90,9 @@ def _pos_to_trafo(pos):
9390
}
9491
return trafo
9592

96-
transformations = [
97-
_pos_to_trafo(pos) for pos in start_positions
98-
]
93+
transformation = _pos_to_trafo(start_position)
9994
# We have to reverse the resolution because pybdv expects ZYX.
100-
return resolution[::-1], unit, transformations
95+
return resolution[::-1], unit, transformation
10196

10297

10398
# TODO derive the scale factors from the shape rather than hard-coding it to 5 levels
@@ -106,15 +101,55 @@ def derive_scale_factors(shape):
106101
return scale_factors
107102

108103

104+
def flamingo_filename_parser(file_path, name_mapping):
105+
filename = os.path.basename(file_path)
106+
107+
# Extract the timepoint.
108+
match = re.search(r'_t(\d+)_', filename)
109+
if match:
110+
timepoint = int(match.group(1))
111+
else:
112+
timepoint = 0
113+
114+
# Extract the additional attributes.
115+
attributes = {}
116+
if name_mapping is None:
117+
name_mapping = {}
118+
119+
# Extract the channel.
120+
match = re.search(r'_C(\d+)_', filename)
121+
channel = int(match.group(1)) if match else 0
122+
channel_mapping = name_mapping.get("channel", {})
123+
attributes["channel"] = {"id": channel, "name": channel_mapping.get(channel, str(channel))}
124+
125+
# Extract the tile.
126+
match = re.search(r'_R(\d+)_', filename)
127+
tile = int(match.group(1)) if match else 0
128+
tile_mapping = name_mapping.get("tile", {})
129+
attributes["tile"] = {"id": tile, "name": tile_mapping.get(tile, str(tile))}
130+
131+
# Extract the illumination.
132+
match = re.search(r'_I(\d+)_', filename)
133+
illumination = int(match.group(1)) if match else 0
134+
illumination_mapping = name_mapping.get("illumination", {})
135+
attributes["illumination"] = {"id": illumination, "name": illumination_mapping.get(illumination, str(illumination))}
136+
137+
# BDV also supports an angle attribute, but it does not seem to be stored in the filename
138+
# "angle": {"id": 0, "name": "0"}
139+
140+
attribute_id = f"c{channel}-t{tile}-i{illumination}"
141+
return timepoint, attributes, attribute_id
142+
143+
109144
def convert_lightsheet_to_bdv(
110145
root: str,
111-
channel_folders: Dict[str, str],
112-
image_file_name_pattern: str,
113146
out_path: str,
147+
attribute_parser: callable = flamingo_filename_parser,
148+
attribute_names: Optional[Dict[str, Dict[int, str]]] = None,
114149
metadata_file_name_pattern: Optional[str] = None,
115150
metadata_root: Optional[str] = None,
116151
metadata_type: str = "flamingo",
117-
center_tiles: bool = True,
152+
center_tiles: bool = False,
118153
resolution: Optional[List[float]] = None,
119154
unit: Optional[str] = None,
120155
scale_factors: Optional[List[List[int]]] = None,
@@ -125,24 +160,14 @@ def convert_lightsheet_to_bdv(
125160
The data is converted to the bdv-n5 file format and can be opened with BigDataViewer
126161
or BigStitcher. This function is written with data layout and metadata of flamingo
127162
microscopes in mind, but could potentially be adapted to other data formats.
128-
We currently don't support multiple timepoints, but support can be added if needed.
129163
130-
This function assumes the following input data format:
131-
<ROOT>/<CHANNEL1>/<TILE1>.tif
132-
/<TILE2>.tif
133-
/...
134-
/<CHANNEL2>/<TILE1>.tif
135-
/<TILE2>.tif
136-
/...
164+
TODO explain the attribute parsing.
137165
138166
Args:
139-
root: Folder that contains the folders with tifs for each channel.
140-
channel_folders: Dictionary that maps the name of each channel to the corresponding folder name
141-
underneath the root folder.
142-
image_file_name_pattern: The pattern for the names of the tifs that contain the data.
143-
This expects a glob pattern (name with '*') to select the corresponding tif files .
144-
The simplest pattern that should work in most cases is '*.tif'.
167+
root: Folder that contains the image data stored as tifs.
168+
This function will take into account all tif files in folders beneath this root directory.
145169
out_path: Output path where the converted data is saved.
170+
attribute_parser: TODO
146171
metadata_file_name_pattern: The pattern for the names of files that contain the metadata.
147172
For flamingo metadata the following pattern should work: '*_Settings.txt'.
148173
metadata_root: Different root folder for the metadata. By default 'root' is used here as well.
@@ -170,60 +195,73 @@ def convert_lightsheet_to_bdv(
170195
if ext == "":
171196
out_path = str(Path(out_path).with_suffix(".n5"))
172197

173-
# Iterate over the channels
174-
for channel_id, (channel_name, channel_folder) in enumerate(channel_folders.items()):
175-
176-
# Get all the image file paths for this channel.
177-
tile_pattern = os.path.join(root, channel_folder, image_file_name_pattern)
178-
file_paths = sorted(glob(tile_pattern))
179-
assert len(file_paths) > 0, tile_pattern
198+
files = sorted(glob(os.path.join(root, "**/*.tif"), recursive=True))
199+
if metadata_file_name_pattern is None:
200+
metadata_files = [None] * len(files)
201+
offset = None
202+
else:
203+
metadata_files = sorted(
204+
glob(
205+
os.path.join(root if metadata_root is None else metadata_root, f"**/{metadata_file_name_pattern}"),
206+
recursive=True
207+
)
208+
)
209+
assert len(metadata_files) == len(files)
210+
211+
if center_tiles:
212+
start_positions = []
213+
for mpath in metadata_files:
214+
start_positions.append(_read_start_position_flamingo(mpath))
215+
offset = np.min(start_positions, axis=0)
216+
else:
217+
offset = None
218+
219+
next_setup_id = 0
220+
attrs_to_setups = {}
221+
222+
for file_path, metadata_file in zip(files, metadata_files):
223+
timepoint, attributes, aid = attribute_parser(file_path, attribute_names)
224+
225+
if aid in attrs_to_setups:
226+
setup_id = attrs_to_setups[aid]
227+
else:
228+
attrs_to_setups[aid] = next_setup_id
229+
setup_id = next_setup_id
230+
next_setup_id += 1
180231

181232
# Read the metadata if it was given.
182-
if metadata_file_name_pattern is None: # No metadata given.
233+
if metadata_file is None: # No metadata given.
183234
# We don't use any tile transformation.
184-
tile_transformations = [None] * len(file_paths)
235+
tile_transformation = [None]
185236
# Set resolution and unit to their default values if they were not passed.
186237
if resolution is None:
187238
resolution = [1.0, 1.0, 1.0]
188239
if unit is None:
189240
unit = "pixel"
190241

191242
else: # We have metadata and read it.
192-
metadata_pattern = os.path.join(
193-
root if metadata_root is None else metadata_root,
194-
channel_folder, metadata_file_name_pattern
195-
)
196-
metadata_paths = sorted(glob(metadata_pattern))
197-
assert len(metadata_paths) == len(file_paths)
198-
resolution, unit, tile_transformations = read_metadata_flamingo(metadata_paths, center_tiles)
199-
200-
if channel_name is None or channel_name.strip() == "": #channel name is empty, assign channel id as name
201-
channel_name = str(channel_id)
202-
203-
for tile_id, (file_path, tile_transformation) in enumerate(zip(file_paths, tile_transformations)):
204-
205-
# Try to memmap the data. If that doesn't work fall back to loading it into memory.
206-
try:
207-
data = tifffile.memmap(file_path, mode="r")
208-
except ValueError:
209-
print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
210-
data = tifffile.imread(file_path)
211-
212-
print("Converting channel", channel_id, "tile", tile_id, "from", file_path, "with shape", data.shape)
213-
if scale_factors is None:
214-
scale_factors = derive_scale_factors(data.shape)
215-
216-
pybdv.make_bdv(
217-
data, out_path,
218-
downscale_factors=scale_factors, downscale_mode="mean",
219-
n_threads=n_threads,
220-
resolution=resolution, unit=unit,
221-
attributes={
222-
"channel": {"id": channel_id, "name": channel_name}, "tile": {"id": tile_id, "name": str(tile_id)},
223-
"angle": {"id": 0, "name": "0"}, "illumination": {"id": 0, "name": "0"}
224-
},
225-
affine=tile_transformation,
226-
)
243+
resolution, unit, tile_transformation = read_metadata_flamingo(metadata_file, offset)
244+
245+
try:
246+
data = tifffile.memmap(file_path, mode="r")
247+
except ValueError:
248+
print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
249+
data = tifffile.imread(file_path)
250+
251+
print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}")
252+
if scale_factors is None:
253+
scale_factors = derive_scale_factors(data.shape)
254+
255+
pybdv.make_bdv(
256+
data, out_path,
257+
downscale_factors=scale_factors, downscale_mode="mean",
258+
n_threads=n_threads,
259+
resolution=resolution, unit=unit,
260+
attributes=attributes,
261+
affine=tile_transformation,
262+
timepoint=timepoint,
263+
setup_id=setup_id,
264+
)
227265

228266

229267
# TODO expose more arguments via CLI.

flamingo_tools/version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.0.1"

setup.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import runpy
2+
from setuptools import setup, find_packages
3+
4+
version = runpy.run_path('flamingo_tools/version.py')['__version__']
5+
setup(name='flamingo_tools',
6+
packages=find_packages(exclude=['test']),
7+
version=version,
8+
author='Constantin Pape',
9+
license='MIT')

0 commit comments

Comments
 (0)