Skip to content

Commit 5a397cd

Browse files
Update data conversion CLI and the segmentation workflow script
1 parent 36f6694 commit 5a397cd

File tree

2 files changed

+94
-63
lines changed

2 files changed

+94
-63
lines changed

flamingo_tools/data_conversion.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def convert_lightsheet_to_bdv(
245245
file_ext: str = ".tif",
246246
attribute_parser: callable = flamingo_filename_parser,
247247
attribute_names: Optional[Dict[str, Dict[int, str]]] = None,
248-
metadata_file_name_pattern: Optional[str] = None,
248+
metadata_file_name_pattern: Optional[str] = "*_Settings.txt",
249249
metadata_root: Optional[str] = None,
250250
metadata_type: str = "flamingo",
251251
center_tiles: bool = False,
@@ -383,36 +383,35 @@ def convert_lightsheet_to_bdv(
383383
_write_missing_views(out_path)
384384

385385

386-
# TODO expose more arguments via CLI.
387386
def convert_lightsheet_to_bdv_cli():
388387
import argparse
389388

390389
parser = argparse.ArgumentParser(
391-
description="Convert lightsheet data to format compatible with BigDataViewer / BigStitcher. "
392-
"Example useage: To convert the synthetic data created via create_synthetic_data.py run: \n"
393-
"python convert_flamingo_data.py -i synthetic_data -c channel0 channel1 -f *.tif -o synthetic.n5"
390+
description="Convert lightsheet data from a flamingo microscope to a format compatible with BigDataViewer / BigStitcher. " # noqa
391+
"For most flamingo data it should be sufficient to run the script like this: \n"
392+
"python convert_flamingo_data.py -i /path/to/flamingo_data -o /path/to/output.n5 \n"
393+
"Here, -i specifies the path to the input folder and -o specifies the path to the output data. \n"
394+
"In order to process flamingo data stored in raw format you also need to pass the argument '-f .raw'." # noqa
394395
)
395396
parser.add_argument(
396-
"--input_root", "-i", required=True,
397-
help="Folder that contains the folders with tifs for each channel."
397+
"--input_root", "-i", required=True, help="Folder that contains the data from the flamingo microscope."
398398
)
399399
parser.add_argument(
400-
"--channel_folders", "-c", nargs="+", required=True,
401-
help="Name of folders with the data for each channel."
400+
"--out_path", "-o", required=True, help="Output path where the converted data will be saved."
402401
)
403402
parser.add_argument(
404-
"--image_file_name_pattern", "-f", required=True,
405-
help="The pattern for the names of the tifs that contain the data. "
406-
"This expects a glob pattern (name with '*') to select the corresponding tif files."
407-
"The simplest pattern that should work in most cases is '*.tif'."
403+
"--file_ext", "-f", default=".tif",
404+
help="The file extension of the image data. By default '.tif' is used, pass '.raw' if your data is stored as raw files." # noqa
408405
)
409406
parser.add_argument(
410-
"--out_path", "-o", required=True,
411-
help="Output path where the converted data is saved."
407+
"--metadata_pattern", default="*_Settings.txt",
408+
help="The filepattern for finding metadata information. The default value works for flamingo data."
412409
)
413410

414411
args = parser.parse_args()
415-
channel_folders = {name: name for name in args.channel_folders}
416412
convert_lightsheet_to_bdv(
417-
args.input_root, channel_folders, args.image_file_name_pattern, args.out_path,
413+
root=args.input_root,
414+
out_path=args.out_path,
415+
file_ext=args.file_ext,
416+
metadata_file_name_pattern=args.metadata_pattern
418417
)

scripts/prediction/segmentation_workflow.py

Lines changed: 78 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import os
3+
from pathlib import Path
34
from shutil import rmtree
45

56
import pybdv.metadata as bdv_metadata
@@ -9,8 +10,6 @@
910
from flamingo_tools.segmentation import run_unet_prediction, filter_isolated_objects
1011
from flamingo_tools.mobie import add_raw_to_mobie, add_segmentation_to_mobie
1112

12-
MOBIE_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/moser/lightsheet/mobie"
13-
1413

1514
def postprocess_seg(output_folder):
1615
print("Run segmentation postprocessing ...")
@@ -30,33 +29,23 @@ def postprocess_seg(output_folder):
3029
)
3130

3231

33-
def export_to_mobie(xml_path, segmentation_folder, scale, mobie_dataset, chunks):
34-
# Add to mobie:
35-
36-
# - raw data (if not yet present)
37-
add_raw_to_mobie(
38-
mobie_project=MOBIE_ROOT,
39-
mobie_dataset=mobie_dataset,
40-
source_name="pv-channel",
41-
xml_path=xml_path,
42-
setup_id=0,
43-
)
44-
45-
# TODO enable passing extra channel names
46-
# - additional channels
32+
def export_to_mobie(xml_path, segmentation_folder, output_folder, scale, mobie_dataset, chunks, channel_names):
33+
# Add to mobie: All the channels.
4734
setup_ids = bdv_metadata.get_setup_ids(xml_path)
48-
if len(setup_ids) > 1:
49-
extra_channel_names = ["gfp_channel", "myo_channel"]
50-
for i, setup_id in enumerate(setup_ids[1:]):
51-
add_raw_to_mobie(
52-
mobie_project=MOBIE_ROOT,
53-
mobie_dataset=mobie_dataset,
54-
source_name=extra_channel_names[i],
55-
xml_path=xml_path,
56-
setup_id=setup_id
57-
)
58-
59-
# - segmentation and post-processed segmentation
35+
if channel_names is None:
36+
channel_names = [f"channel-{i}" for i in range(len(setup_ids))]
37+
else:
38+
assert len(channel_names) == len(setup_ids)
39+
for i, setup_id in enumerate(setup_ids):
40+
add_raw_to_mobie(
41+
mobie_project=output_folder,
42+
mobie_dataset=mobie_dataset,
43+
source_name=channel_names[i],
44+
xml_path=xml_path,
45+
setup_id=setup_id
46+
)
47+
48+
# The segmentation and post-processed segmentation results.
6049
seg_path = os.path.join(segmentation_folder, "segmentation.zarr")
6150
seg_resolution = bdv_metadata.get_resolution(xml_path, setup_id=0)
6251
if scale == 1:
@@ -66,7 +55,7 @@ def export_to_mobie(xml_path, segmentation_folder, scale, mobie_dataset, chunks)
6655
seg_key = "segmentation"
6756
seg_name = "nuclei_fullscale" if scale == 0 else "nuclei_downscaled"
6857
add_segmentation_to_mobie(
69-
mobie_project=MOBIE_ROOT,
58+
mobie_project=output_folder,
7059
mobie_dataset=mobie_dataset,
7160
source_name=seg_name,
7261
segmentation_path=seg_path,
@@ -80,7 +69,7 @@ def export_to_mobie(xml_path, segmentation_folder, scale, mobie_dataset, chunks)
8069
seg_key = "segmentation_postprocessed"
8170
seg_name += "_postprocessed"
8271
add_segmentation_to_mobie(
83-
mobie_project=MOBIE_ROOT,
72+
mobie_project=output_folder,
8473
mobie_dataset=mobie_dataset,
8574
source_name=seg_name,
8675
segmentation_path=seg_path,
@@ -93,15 +82,42 @@ def export_to_mobie(xml_path, segmentation_folder, scale, mobie_dataset, chunks)
9382

9483

9584
def main():
96-
parser = argparse.ArgumentParser()
97-
parser.add_argument("-i", "--input", required=True)
98-
parser.add_argument("-o", "--output_folder", required=True)
99-
parser.add_argument("-s", "--scale", required=True, type=int)
100-
parser.add_argument("-m", "--mobie_dataset", required=True)
85+
# Argument parser so that this script can be used from the command line.
86+
parser = argparse.ArgumentParser(
87+
description="Run segmentation and export the segmentation result for a lightsheet volume."
88+
)
89+
parser.add_argument(
90+
"-i", "--input", required=True,
91+
help="Path to the input volume. This should be the path to the xml file obtained after stitching."
92+
)
93+
parser.add_argument(
94+
"-o", "--output", required=True,
95+
help="Path to the output folder. This is where the MoBIE project, with image data and segmentation result, will be stored." # noqa
96+
)
97+
parser.add_argument(
98+
"-s", "--segmentation_folder", required=True,
99+
help="Path to a folder where intermediate results for the segmentation will be stored. "
100+
"The results will be removed after the export to MoBIE."
101+
)
102+
parser.add_argument(
103+
"--mobie_dataset",
104+
help="Internal name of the dataset in MoBIE. If not given this will be derived from the name of the input volume.", # noqa
105+
)
106+
parser.add_argument(
107+
"--setup_id", default=0, type=int,
108+
help="The setup id to use for the segmentation. Choose the setup-id for the channel that contains the data to be used for segmentation." # noqa
109+
"This should be the PV channel for SGN segmentation."
110+
)
111+
parser.add_argument(
112+
"--scale", default=0, type=int,
113+
help="The scale to use for segmentation. By default this will run at the lowest scale (= full resolution)."
114+
)
101115
parser.add_argument("--model")
102-
116+
parser.add_argument("--channel_names", nargs="+", default=None, help="The names of channels in the dataset, in the same order as the setup-ids.") # noqa
103117
args = parser.parse_args()
104118

119+
# This is just some preparation logic to get a good size for filtering
120+
# the nuclei depending on which scale we use for running the segmentation.
105121
scale = args.scale
106122
if scale == 0:
107123
min_size = 1000
@@ -110,18 +126,25 @@ def main():
110126
else:
111127
raise ValueError
112128

129+
# Here we read the path to the data from the xml file and we construct the
130+
# input key (= internal file path in the n5 file with the data),
131+
# that points to the correct setup-id and scale.
113132
xml_path = args.input
114133
assert os.path.splitext(xml_path)[1] == ".xml"
115134
input_path = bdv_metadata.get_data_path(xml_path, return_absolute_path=True)
135+
input_key = f"setup{args.setup_id}/timepoint0/s{scale}"
116136

117-
# TODO need to make sure that PV is always setup 0
118-
input_key = f"setup0/timepoint0/s{scale}"
119-
137+
# This is just some preparation to choose the correct block sizes for running prediction
138+
# depending on having a GPU or not available.
139+
# (You will need a GPU to run this for any larger volume, CPU support is just for testing purposes.)
120140
have_cuda = torch.cuda.is_available()
121141
chunks = z5py.File(input_path, "r")[input_key].chunks
122142
block_shape = tuple([2 * ch for ch in chunks]) if have_cuda else tuple(chunks)
123143
halo = (16, 64, 64) if have_cuda else (8, 32, 32)
124144

145+
# Here we find the path to the model for segmentation.
146+
# If the path it given it should point to the ".pt" file.
147+
# Otherwise, we try to load the model from where the checkpoint was stored on my system.
125148
if args.model is not None:
126149
model = args.model
127150
else:
@@ -130,22 +153,31 @@ def main():
130153
else:
131154
model = "../training/checkpoints/cochlea_distance_unet-train-downsampled"
132155

156+
# These functions run the actual segmentation and the segmentation postprocessing.
133157
run_unet_prediction(
134-
input_path, input_key, args.output_folder, model,
158+
input_path, input_key, args.segmentation_folder, model,
135159
scale=None, min_size=min_size,
136160
block_shape=block_shape, halo=halo,
137161
)
162+
postprocess_seg(args.segmentation_folder)
138163

139-
postprocess_seg(args.output_folder)
140-
141-
export_to_mobie(xml_path, args.output_folder, scale, args.mobie_dataset, chunks)
164+
# This function exports the segmentation and the corresponding channel to MoBIE.
165+
if args.mobie_dataset is None:
166+
mobie_dataset = Path(xml_path).stem
167+
else:
168+
mobie_dataset = args.mobie_dataset
169+
export_to_mobie(
170+
xml_path, args.segmentation_folder, args.output_folder, scale, mobie_dataset, chunks,
171+
channel_names=args.channel_names
172+
)
142173

143-
# clean up: remove segmentation folders
174+
# Finally, we clean up the intermediate segmentation results, that are not needed anymore
175+
# because everything was exported to MoBIE.
144176
print("Cleaning up intermediate segmentation results")
145177
print("This may take a while, but everything else is done.")
146178
print("You can check the results in the MoBIE project already at:")
147-
print(f"{MOBIE_ROOT}:{args.mobie_dataset}")
148-
rmtree(args.output_folder)
179+
print(f"{args.output_folder}:{mobie_dataset}")
180+
rmtree(args.segmentation_folder)
149181

150182

151183
if __name__ == "__main__":

0 commit comments

Comments
 (0)