Skip to content

Commit 3588f61

Browse files
Merge improved parsing and implement conversion code
2 parents 7869ee9 + 0c08cbf commit 3588f61

File tree

10 files changed

+304
-118
lines changed

10 files changed

+304
-118
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
synthetic_data/
22
__pycache__/
33
converted/
4+
*.egg-info/

flamingo_tools/data_conversion.py

Lines changed: 126 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import multiprocessing as mp
22
import os
3+
import re
34

45
from glob import glob
56
from pathlib import Path
@@ -58,19 +59,15 @@ def _read_start_position_flamingo(path):
5859
return start_position
5960

6061

61-
def read_metadata_flamingo(metadata_paths, center_tiles):
62-
start_positions = []
62+
def read_metadata_flamingo(metadata_path, offset=None):
6363
resolution, unit = None, None
64-
for path in metadata_paths:
65-
resolution, unit = _read_resolution_and_unit_flamingo(path)
66-
start_position = _read_start_position_flamingo(path)
67-
start_positions.append(start_position)
6864

69-
start_positions = np.array(start_positions)
70-
offset = np.min(start_positions, axis=0) if center_tiles else np.array([0.0, 0.0, 0.0])
65+
resolution, unit = _read_resolution_and_unit_flamingo(metadata_path)
66+
start_position = _read_start_position_flamingo(metadata_path)
7167

7268
def _pos_to_trafo(pos):
73-
pos -= offset
69+
if offset is not None:
70+
pos -= offset
7471

7572
# FIXME: dirty hack
7673
# scale = 4
@@ -97,11 +94,9 @@ def _pos_to_trafo(pos):
9794
}
9895
return trafo
9996

100-
transformations = [
101-
_pos_to_trafo(pos) for pos in start_positions
102-
]
97+
transformation = _pos_to_trafo(start_position)
10398
# We have to reverse the resolution because pybdv expects ZYX.
104-
return resolution[::-1], unit, transformations
99+
return resolution[::-1], unit, transformation
105100

106101

107102
# TODO derive the scale factors from the shape rather than hard-coding it to 5 levels
@@ -110,30 +105,15 @@ def derive_scale_factors(shape):
110105
return scale_factors
111106

112107

113-
def _to_bdv(
114-
data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
115-
):
116-
pybdv.make_bdv(
117-
data, out_path,
118-
downscale_factors=scale_factors, downscale_mode="mean",
119-
n_threads=n_threads,
120-
resolution=resolution, unit=unit,
121-
attributes={
122-
"channel": {"id": channel_id, "name": channel_name}, "tile": {"id": tile_id, "name": str(tile_id)},
123-
"angle": {"id": 0, "name": "0"}, "illumination": {"id": 0, "name": "0"}
124-
},
125-
affine=tile_transformation,
126-
)
127-
108+
def _to_ome_zarr(data, out_path, scale_factors, timepoint, setup_id, attributes, unit, resolution):
109+
n_threads = mp.cpu_count()
110+
chunks = (128, 128, 128)
128111

129-
def _to_ome_zarr(
130-
data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
131-
):
132112
# Write the base dataset.
133-
base_key = f"c{channel_id}-t{tile_id}"
134-
chunks = (128, 128, 128)
113+
base_key = f"setup{setup_id}/timepoint{timepoint}"
114+
135115
with open_file(out_path, "a") as f:
136-
ds = f.create_dataset(f"{base_key}/s0", shape=data.shape, compression='gzip',
116+
ds = f.create_dataset(f"{base_key}/s0", shape=data.shape, compression="gzip",
137117
chunks=chunks, dtype=data.dtype)
138118
ds.n_threads = n_threads
139119
ds[:] = data
@@ -143,27 +123,70 @@ def _to_ome_zarr(
143123
for level, scale_factor in enumerate(scale_factors, 1):
144124
inv_scale = [1.0 / sc for sc in scale_factor]
145125
data = rescale(data, inv_scale, preserve_range=True).astype(data.dtype)
146-
ds = f.create_dataset(f"{base_key}/s{level}", shape=data.shape, compression='gzip',
126+
ds = f.create_dataset(f"{base_key}/s{level}", shape=data.shape, compression="gzip",
147127
chunks=chunks, dtype=data.dtype)
148128
ds.n_threads = n_threads
149129
ds[:] = data
150130

131+
g = f[f"setup{setup_id}"]
132+
g.attrs.update(attributes)
133+
151134
# Write the ome zarr metadata.
152135
metadata_dict = {"unit": unit, "resolution": resolution}
153136
write_format_metadata(
154137
"ome.zarr", out_path, metadata_dict, scale_factors=scale_factors, prefix=base_key
155138
)
156139

157140

141+
def flamingo_filename_parser(file_path, name_mapping):
142+
filename = os.path.basename(file_path)
143+
144+
# Extract the timepoint.
145+
match = re.search(r'_t(\d+)_', filename)
146+
if match:
147+
timepoint = int(match.group(1))
148+
else:
149+
timepoint = 0
150+
151+
# Extract the additional attributes.
152+
attributes = {}
153+
if name_mapping is None:
154+
name_mapping = {}
155+
156+
# Extract the channel.
157+
match = re.search(r'_C(\d+)_', filename)
158+
channel = int(match.group(1)) if match else 0
159+
channel_mapping = name_mapping.get("channel", {})
160+
attributes["channel"] = {"id": channel, "name": channel_mapping.get(channel, str(channel))}
161+
162+
# Extract the tile.
163+
match = re.search(r'_R(\d+)_', filename)
164+
tile = int(match.group(1)) if match else 0
165+
tile_mapping = name_mapping.get("tile", {})
166+
attributes["tile"] = {"id": tile, "name": tile_mapping.get(tile, str(tile))}
167+
168+
# Extract the illumination.
169+
match = re.search(r'_I(\d+)_', filename)
170+
illumination = int(match.group(1)) if match else 0
171+
illumination_mapping = name_mapping.get("illumination", {})
172+
attributes["illumination"] = {"id": illumination, "name": illumination_mapping.get(illumination, str(illumination))}
173+
174+
# BDV also supports an angle attribute, but it does not seem to be stored in the filename
175+
# "angle": {"id": 0, "name": "0"}
176+
177+
attribute_id = f"c{channel}-t{tile}-i{illumination}"
178+
return timepoint, attributes, attribute_id
179+
180+
158181
def convert_lightsheet_to_bdv(
159182
root: str,
160-
channel_folders: Dict[str, str],
161-
image_file_name_pattern: str,
162183
out_path: str,
184+
attribute_parser: callable = flamingo_filename_parser,
185+
attribute_names: Optional[Dict[str, Dict[int, str]]] = None,
163186
metadata_file_name_pattern: Optional[str] = None,
164187
metadata_root: Optional[str] = None,
165188
metadata_type: str = "flamingo",
166-
center_tiles: bool = True,
189+
center_tiles: bool = False,
167190
resolution: Optional[List[float]] = None,
168191
unit: Optional[str] = None,
169192
scale_factors: Optional[List[List[int]]] = None,
@@ -174,24 +197,14 @@ def convert_lightsheet_to_bdv(
174197
The data is converted to the bdv-n5 file format and can be opened with BigDataViewer
175198
or BigStitcher. This function is written with data layout and metadata of flamingo
176199
microscopes in mind, but could potentially be adapted to other data formats.
177-
We currently don't support multiple timepoints, but support can be added if needed.
178200
179-
This function assumes the following input data format:
180-
<ROOT>/<CHANNEL1>/<TILE1>.tif
181-
/<TILE2>.tif
182-
/...
183-
/<CHANNEL2>/<TILE1>.tif
184-
/<TILE2>.tif
185-
/...
201+
TODO explain the attribute parsing.
186202
187203
Args:
188-
root: Folder that contains the folders with tifs for each channel.
189-
channel_folders: Dictionary that maps the name of each channel to the corresponding folder name
190-
underneath the root folder.
191-
image_file_name_pattern: The pattern for the names of the tifs that contain the data.
192-
This expects a glob pattern (name with '*') to select the corresponding tif files .
193-
The simplest pattern that should work in most cases is '*.tif'.
204+
root: Folder that contains the image data stored as tifs.
205+
This function will take into account all tif files in folders beneath this root directory.
194206
out_path: Output path where the converted data is saved.
207+
attribute_parser: TODO
195208
metadata_file_name_pattern: The pattern for the names of files that contain the metadata.
196209
For flamingo metadata the following pattern should work: '*_Settings.txt'.
197210
metadata_root: Different root folder for the metadata. By default 'root' is used here as well.
@@ -216,60 +229,81 @@ def convert_lightsheet_to_bdv(
216229

217230
# Make sure we convert to n5, in case no extension is passed.
218231
ext = os.path.splitext(out_path)[1]
232+
convert_to_ome_zarr = False
219233
if ext == "":
220234
out_path = str(Path(out_path).with_suffix(".n5"))
221-
conversion_function = _to_bdv
222235
elif ext == ".zarr":
223-
conversion_function = _to_ome_zarr
224-
else:
225-
conversion_function = _to_bdv
236+
convert_to_ome_zarr = True
226237

227-
# Iterate over the channels
228-
for channel_id, (channel_name, channel_folder) in enumerate(channel_folders.items()):
229-
230-
# Get all the image file paths for this channel.
231-
tile_pattern = os.path.join(root, channel_folder, image_file_name_pattern)
232-
file_paths = sorted(glob(tile_pattern))
233-
assert len(file_paths) > 0, tile_pattern
238+
files = sorted(glob(os.path.join(root, "**/*.tif"), recursive=True))
239+
if metadata_file_name_pattern is None:
240+
metadata_files = [None] * len(files)
241+
offset = None
242+
else:
243+
metadata_files = sorted(
244+
glob(
245+
os.path.join(root if metadata_root is None else metadata_root, f"**/{metadata_file_name_pattern}"),
246+
recursive=True
247+
)
248+
)
249+
assert len(metadata_files) == len(files)
250+
251+
if center_tiles:
252+
start_positions = []
253+
for mpath in metadata_files:
254+
start_positions.append(_read_start_position_flamingo(mpath))
255+
offset = np.min(start_positions, axis=0)
256+
else:
257+
offset = None
258+
259+
next_setup_id = 0
260+
attrs_to_setups = {}
261+
262+
for file_path, metadata_file in zip(files, metadata_files):
263+
timepoint, attributes, aid = attribute_parser(file_path, attribute_names)
264+
265+
if aid in attrs_to_setups:
266+
setup_id = attrs_to_setups[aid]
267+
else:
268+
attrs_to_setups[aid] = next_setup_id
269+
setup_id = next_setup_id
270+
next_setup_id += 1
234271

235272
# Read the metadata if it was given.
236-
if metadata_file_name_pattern is None: # No metadata given.
273+
if metadata_file is None: # No metadata given.
237274
# We don't use any tile transformation.
238-
tile_transformations = [None] * len(file_paths)
275+
tile_transformation = None
239276
# Set resolution and unit to their default values if they were not passed.
240277
if resolution is None:
241278
resolution = [1.0, 1.0, 1.0]
242279
if unit is None:
243280
unit = "pixel"
244281

245282
else: # We have metadata and read it.
246-
metadata_pattern = os.path.join(
247-
root if metadata_root is None else metadata_root,
248-
channel_folder, metadata_file_name_pattern
249-
)
250-
metadata_paths = sorted(glob(metadata_pattern))
251-
assert len(metadata_paths) == len(file_paths)
252-
resolution, unit, tile_transformations = read_metadata_flamingo(metadata_paths, center_tiles)
253-
254-
if channel_name is None or channel_name.strip() == "": # channel name is empty, assign channel id as name
255-
channel_name = str(channel_id)
256-
257-
for tile_id, (file_path, tile_transformation) in enumerate(zip(file_paths, tile_transformations)):
258-
259-
# Try to memmap the data. If that doesn't work fall back to loading it into memory.
260-
try:
261-
data = tifffile.memmap(file_path, mode="r")
262-
except ValueError:
263-
print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
264-
data = tifffile.imread(file_path)
265-
266-
print("Converting channel", channel_id, "tile", tile_id, "from", file_path, "with shape", data.shape)
267-
if scale_factors is None:
268-
scale_factors = derive_scale_factors(data.shape)
269-
270-
conversion_function(
271-
data, out_path, scale_factors, n_threads, resolution, unit,
272-
channel_id, channel_name, tile_id, tile_transformation
283+
resolution, unit, tile_transformation = read_metadata_flamingo(metadata_file, offset)
284+
285+
try:
286+
data = tifffile.memmap(file_path, mode="r")
287+
except ValueError:
288+
print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
289+
data = tifffile.imread(file_path)
290+
291+
print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}")
292+
if scale_factors is None:
293+
scale_factors = derive_scale_factors(data.shape)
294+
295+
if convert_to_ome_zarr:
296+
_to_ome_zarr(data, out_path, scale_factors, timepoint, setup_id, attributes, unit, resolution)
297+
else:
298+
pybdv.make_bdv(
299+
data, out_path,
300+
downscale_factors=scale_factors, downscale_mode="mean",
301+
n_threads=n_threads,
302+
resolution=resolution, unit=unit,
303+
attributes=attributes,
304+
affine=tile_transformation,
305+
timepoint=timepoint,
306+
setup_id=setup_id,
273307
)
274308

275309

flamingo_tools/test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# TODO add metadata
88
def create_test_data(root, size=256, n_channels=2, n_tiles=4):
99
channel_folders = [f"channel{chan_id}" for chan_id in range(n_channels)]
10-
file_name_pattern = "volume_R%i_C%i.tif"
10+
file_name_pattern = "volume_R%i_C%i_I0.tif"
1111
for chan_id, channel_folder in enumerate(channel_folders):
1212
out_folder = os.path.join(root, channel_folder)
1313
os.makedirs(out_folder, exist_ok=True)

flamingo_tools/version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.0.1"

scripts/data_transfer/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
credentials.json

scripts/data_transfer/README.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Data Transfer Moser
2+
3+
## Transfer via smbclient
4+
5+
Current approach to the data transfer:
6+
- Log in to SCC login node:
7+
$
8+
- Go to `/scratch1/projects/cca/data/moser`
9+
- Create subfolder <NAME> for cochlea to be copied
10+
- Log in via
11+
```
12+
$ smbclient \\\\wfs-medizin.top.gwdg.de\\ukon-all\$\\ukon100 -U GWDG\\pape41"
13+
```
14+
- Go to the folder with the cochlea to copy (cd works)
15+
- Copy the folder via:
16+
- recurse ON
17+
- prompt OFF
18+
- mget *
19+
- Copy this to HLRN by logging into it and running
20+
```
21+
$ rsync -e "ssh -i ~/.ssh/id_rsa_hlrn" -avz [email protected]:/scratch1/projects/cca/data/mose
22+
r/<NAME> /mnt/lustre-emmy-hdd/projects/nim00007/data/moser/lightsheet/volumes/<NAME>
23+
```
24+
- Remove on SCC
25+
26+
## Next files
27+
28+
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\M171_2R_converted_n5
29+
- unclear what the converted data is
30+
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\155_1L_converted_n5\BDVexport.n5
31+
- Copied to SCC, need to rsync.
32+
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\MLR151_2R_converted_n5
33+
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\G11_1L_converted_n5
34+
35+
## Improvements
36+
37+
Try to automate via https://github.com/jborean93/smbprotocol see `sync_smb.py` for ChatGPT's inital version.
38+
Connection not possible from HLRN.
39+
40+
## Transfer Back
41+
42+
For transfering back MoBIE results.
43+
...

0 commit comments

Comments
 (0)