Skip to content

Commit 7869ee9

Browse files
Add initial data conversion scripts
1 parent 4703502 commit 7869ee9

File tree

4 files changed

+154
-11
lines changed

4 files changed

+154
-11
lines changed

flamingo_tools/data_conversion.py

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import pybdv
1010
import tifffile
1111

12+
from cluster_tools.utils.volume_utils import write_format_metadata
13+
from elf.io import open_file
14+
from skimage.transform import rescale
15+
1216

1317
def _read_resolution_and_unit_flamingo(mdata_path):
1418
resolution = None
@@ -106,6 +110,51 @@ def derive_scale_factors(shape):
106110
return scale_factors
107111

108112

113+
def _to_bdv(
114+
data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
115+
):
116+
pybdv.make_bdv(
117+
data, out_path,
118+
downscale_factors=scale_factors, downscale_mode="mean",
119+
n_threads=n_threads,
120+
resolution=resolution, unit=unit,
121+
attributes={
122+
"channel": {"id": channel_id, "name": channel_name}, "tile": {"id": tile_id, "name": str(tile_id)},
123+
"angle": {"id": 0, "name": "0"}, "illumination": {"id": 0, "name": "0"}
124+
},
125+
affine=tile_transformation,
126+
)
127+
128+
129+
def _to_ome_zarr(
130+
data, out_path, scale_factors, n_threads, resolution, unit, channel_id, channel_name, tile_id, tile_transformation
131+
):
132+
# Write the base dataset.
133+
base_key = f"c{channel_id}-t{tile_id}"
134+
chunks = (128, 128, 128)
135+
with open_file(out_path, "a") as f:
136+
ds = f.create_dataset(f"{base_key}/s0", shape=data.shape, compression='gzip',
137+
chunks=chunks, dtype=data.dtype)
138+
ds.n_threads = n_threads
139+
ds[:] = data
140+
141+
# TODO parallelized implementation.
142+
# Do downscaling.
143+
for level, scale_factor in enumerate(scale_factors, 1):
144+
inv_scale = [1.0 / sc for sc in scale_factor]
145+
data = rescale(data, inv_scale, preserve_range=True).astype(data.dtype)
146+
ds = f.create_dataset(f"{base_key}/s{level}", shape=data.shape, compression='gzip',
147+
chunks=chunks, dtype=data.dtype)
148+
ds.n_threads = n_threads
149+
ds[:] = data
150+
151+
# Write the ome zarr metadata.
152+
metadata_dict = {"unit": unit, "resolution": resolution}
153+
write_format_metadata(
154+
"ome.zarr", out_path, metadata_dict, scale_factors=scale_factors, prefix=base_key
155+
)
156+
157+
109158
def convert_lightsheet_to_bdv(
110159
root: str,
111160
channel_folders: Dict[str, str],
@@ -169,6 +218,11 @@ def convert_lightsheet_to_bdv(
169218
ext = os.path.splitext(out_path)[1]
170219
if ext == "":
171220
out_path = str(Path(out_path).with_suffix(".n5"))
221+
conversion_function = _to_bdv
222+
elif ext == ".zarr":
223+
conversion_function = _to_ome_zarr
224+
else:
225+
conversion_function = _to_bdv
172226

173227
# Iterate over the channels
174228
for channel_id, (channel_name, channel_folder) in enumerate(channel_folders.items()):
@@ -197,7 +251,7 @@ def convert_lightsheet_to_bdv(
197251
assert len(metadata_paths) == len(file_paths)
198252
resolution, unit, tile_transformations = read_metadata_flamingo(metadata_paths, center_tiles)
199253

200-
if channel_name is None or channel_name.strip() == "": #channel name is empty, assign channel id as name
254+
if channel_name is None or channel_name.strip() == "": # channel name is empty, assign channel id as name
201255
channel_name = str(channel_id)
202256

203257
for tile_id, (file_path, tile_transformation) in enumerate(zip(file_paths, tile_transformations)):
@@ -213,16 +267,9 @@ def convert_lightsheet_to_bdv(
213267
if scale_factors is None:
214268
scale_factors = derive_scale_factors(data.shape)
215269

216-
pybdv.make_bdv(
217-
data, out_path,
218-
downscale_factors=scale_factors, downscale_mode="mean",
219-
n_threads=n_threads,
220-
resolution=resolution, unit=unit,
221-
attributes={
222-
"channel": {"id": channel_id, "name": channel_name}, "tile": {"id": tile_id, "name": str(tile_id)},
223-
"angle": {"id": 0, "name": "0"}, "illumination": {"id": 0, "name": "0"}
224-
},
225-
affine=tile_transformation,
270+
conversion_function(
271+
data, out_path, scale_factors, n_threads, resolution, unit,
272+
channel_id, channel_name, tile_id, tile_transformation
226273
)
227274

228275

scripts/ome_challenge/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
credentials.json
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import os
2+
import sys
3+
4+
ROOT = "/mnt/lustre-emmy-hdd/usr/u12086/data/flamingo"
5+
6+
7+
def convert_to_ome_zarr_v2(name):
8+
sys.path.append("../..")
9+
from flamingo_tools.data_conversion import convert_lightsheet_to_bdv
10+
11+
input_root = os.path.join(ROOT, name)
12+
assert os.path.exists(input_root)
13+
14+
output_root = os.path.join(ROOT, "ngff-v2")
15+
os.makedirs(output_root, exist_ok=True)
16+
17+
output_path = os.path.join(output_root, f"{name}.ome.zarr")
18+
19+
# Number of timepoints:
20+
# ntp = 10
21+
ntp = 1 # for testing
22+
23+
channel_folders = {f"t{tp:02}": "" for tp in range(ntp)}
24+
convert_lightsheet_to_bdv(
25+
input_root, channel_folders, image_file_name_pattern="*_t000000_*_C01_I0_*.tif",
26+
out_path=output_path,
27+
)
28+
29+
30+
def convert_to_ome_zarr_v3(name):
31+
pass
32+
33+
34+
def main():
35+
name = "Platynereis-H2B-TL"
36+
convert_to_ome_zarr_v2(name)
37+
38+
39+
if __name__ == "__main__":
40+
main()
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
3+
from pydrive2.auth import GoogleAuth
4+
from pydrive2.drive import GoogleDrive
5+
6+
7+
IDS = {
8+
"Platynereis-H2B-TL": "1jGwaJ62w80GYo5I_Jcb3O_g7y4RKEhjI",
9+
"Zebrafish-XSPIM-multiview": "https://drive.google.com/drive/folders/175hZRrUNWM2UzY0wzXPFjuFZ5QKUN-tm?usp=drive_link" # noqa
10+
}
11+
12+
# ROOT = "/mnt/lustre-grete/usr/u12086/data/flamingo"
13+
ROOT = "/mnt/lustre-emmy-hdd/usr/u12086/data/flamingo"
14+
15+
16+
def download_folder(drive, name):
17+
os.makedirs(ROOT, exist_ok=True)
18+
19+
destination_folder = os.path.join(ROOT, name)
20+
folder_id = IDS[name]
21+
22+
folder_query = f"'{folder_id}' in parents and trashed=false"
23+
file_list = drive.ListFile({'q': folder_query}).GetList()
24+
25+
for file in file_list:
26+
if file['mimeType'] == 'application/vnd.google-apps.folder':
27+
folder_name = os.path.join(destination_folder, file['title'])
28+
os.makedirs(folder_name, exist_ok=True)
29+
download_folder(file['id'], folder_name)
30+
else:
31+
print(f"Downloading {file['title']} to {destination_folder}")
32+
# breakpoint()
33+
file.GetContentFile(os.path.join(destination_folder, file['title']))
34+
35+
36+
def get_drive():
37+
gauth = GoogleAuth()
38+
gauth.LoadCredentialsFile("credentials.json") # Use the saved credentials
39+
if gauth.access_token_expired:
40+
gauth.Refresh()
41+
else:
42+
gauth.Authorize()
43+
drive = GoogleDrive(gauth)
44+
return drive
45+
46+
47+
def main():
48+
drive = get_drive()
49+
50+
# download_from_gdrive(name="Zebrafish-XSPIM-multiview")
51+
download_folder(drive, name="Platynereis-H2B-TL")
52+
53+
54+
if __name__ == "__main__":
55+
main()

0 commit comments

Comments
 (0)