Skip to content

Commit 9aa89ee

Browse files
Merge branch 'master' into ome-challenge
2 parents 0cc2d1e + ecd278c commit 9aa89ee

File tree

8 files changed

+346
-22
lines changed

8 files changed

+346
-22
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ synthetic_data/
22
__pycache__/
33
converted/
44
*.egg-info/
5+
checkpoints/
6+
logs/

flamingo_tools/data_conversion.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,16 @@ def flamingo_filename_parser(file_path, name_mapping):
171171
illumination_mapping = name_mapping.get("illumination", {})
172172
attributes["illumination"] = {"id": illumination, "name": illumination_mapping.get(illumination, str(illumination))}
173173

174+
# Extract D. TODO what is this?
175+
match = re.search(r'_D(\d+)_', filename)
176+
D = int(match.group(1)) if match else 0
177+
D_mapping = name_mapping.get("D", {})
178+
attributes["D"] = {"id": D, "name": D_mapping.get(D, str(D))}
179+
174180
# BDV also supports an angle attribute, but it does not seem to be stored in the filename
175181
# "angle": {"id": 0, "name": "0"}
176182

177-
attribute_id = f"c{channel}-t{tile}-i{illumination}"
183+
attribute_id = f"c{channel}-t{tile}-i{illumination}-d{D}"
178184
return timepoint, attributes, attribute_id
179185

180186

@@ -282,13 +288,13 @@ def convert_lightsheet_to_bdv(
282288
else: # We have metadata and read it.
283289
resolution, unit, tile_transformation = read_metadata_flamingo(metadata_file, offset)
284290

291+
print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}")
285292
try:
286293
data = tifffile.memmap(file_path, mode="r")
287294
except ValueError:
288295
print(f"Could not memmap the data from {file_path}. Fall back to load it into memory.")
289296
data = tifffile.imread(file_path)
290297

291-
print(f"Converting tp={timepoint}, channel={attributes['channel']}, tile={attributes['tile']}")
292298
if scale_factors is None:
293299
scale_factors = derive_scale_factors(data.shape)
294300

flamingo_tools/mobie.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import os
2+
import tempfile
3+
from typing import Tuple
4+
5+
from mobie import add_bdv_image, add_segmentation
6+
from mobie.metadata.dataset_metadata import read_dataset_metadata
7+
8+
9+
# TODO refactor to mobie utils
10+
def _source_exists(mobie_project, mobie_dataset, source_name):
11+
dataset_folder = os.path.join(mobie_project, mobie_dataset)
12+
metadata = read_dataset_metadata(dataset_folder)
13+
sources = metadata.get("sources", {})
14+
return source_name in sources
15+
16+
17+
def add_raw_to_mobie(
18+
mobie_project: str,
19+
mobie_dataset: str,
20+
source_name: str,
21+
xml_path: str,
22+
skip_existing: bool = True,
23+
setup_id: int = 0,
24+
):
25+
"""
26+
"""
27+
# Check if we have converted this data already.
28+
have_source = _source_exists(mobie_project, mobie_dataset, source_name)
29+
if have_source and skip_existing:
30+
print(f"Source {source_name} already exists in {mobie_project}:{mobie_dataset}.")
31+
print("Conversion to mobie will be skipped.")
32+
return
33+
elif have_source:
34+
raise NotImplementedError
35+
36+
with tempfile.TemporaryDirectory() as tmpdir:
37+
add_bdv_image(
38+
xml_path=xml_path,
39+
root=mobie_project,
40+
dataset_name=mobie_dataset,
41+
image_name=source_name,
42+
tmp_folder=tmpdir,
43+
file_format="bdv.n5",
44+
setup_ids=[setup_id],
45+
)
46+
47+
48+
def add_segmentation_to_mobie(
49+
mobie_project: str,
50+
mobie_dataset: str,
51+
source_name: str,
52+
segmentation_path: str,
53+
segmentation_key: str,
54+
resolution: Tuple[int, int, int],
55+
unit: str,
56+
scale_factors: Tuple[Tuple[int, int, int]],
57+
chunks: Tuple[int, int, int],
58+
skip_existing: bool = True,
59+
):
60+
# Check if we have converted this data already.
61+
have_source = _source_exists(mobie_project, mobie_dataset, source_name)
62+
if have_source and skip_existing:
63+
print(f"Source {source_name} already exists in {mobie_project}:{mobie_dataset}.")
64+
print("Conversion to mobie will be skipped.")
65+
return
66+
elif have_source:
67+
raise NotImplementedError
68+
69+
with tempfile.TemporaryDirectory() as tmpdir:
70+
add_segmentation(
71+
input_path=segmentation_path, input_key=segmentation_key,
72+
root=mobie_project, dataset_name=mobie_dataset,
73+
segmentation_name=source_name,
74+
resolution=resolution, scale_factors=scale_factors,
75+
chunks=chunks, file_format="bdv.n5",
76+
tmp_folder=tmpdir
77+
)

scripts/README.md

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
# Segmentation for large lightsheet volumes
22

3+
4+
## Installation
5+
6+
Needs [torch-em](https://github.com/constantinpape/torch-em) in the python environment. See [here](https://github.com/constantinpape/torch-em?tab=readme-ov-file#installation) for installation instructions. (If possible use `mamba` instead of `conda`.)
7+
After setting up the environment you also have to add support for the MoBIE python library via
8+
```
9+
conda install -c conda-forge mobie_utils
10+
```
11+
12+
313
## Training
414

515
Contains the scripts for training a U-Net that predicts foreground probabilties and normalized object distances.
616

17+
718
## Prediction
819

9-
Contains the scripts for running segmentation for a large volume with a distance prediction U-Net. (Other scripts are work in progress.)
20+
Contains the scripts for running segmentation for a large volume with a distance prediction U-Net, postprocessing the segmentation
21+
and exporting the segmentation result to MoBIE
1022

11-
You can run it like this for input that is stored in n5:
23+
To run the full segmentation workflow, including the export to MoBIE you can use the `segmentation_workflow.py` script as follows:
24+
```
25+
python segmentation_workflow.py -i /path/to/volume.xml -o /path/to/output_folder --scale 0 -m data_name --model /path/to/model.pt
26+
```
27+
28+
Here, `-i` must point to the xml file of the fused data exported from BigSticher, `-o` indicates the output folder where the MoBIE project with the semgentation result will be saved, `--scale` indicates the scale to use for the segmentation, `-m` the name of the data in MoBIE and `--model` the path to the segmentation model.
29+
30+
### Individual Workflow Steps
31+
32+
You can also run individual steps of the workflow, like prediction and segmentation:
33+
34+
You can run it like this for an input volume that is stored in n5, e.g. the fused export from bigstitcher:
1235
```
1336
python run_prediction_distance_unet.py -i /path/to/volume.n5 -k setup0/timepoint0/s0 -m /path/to/model -o /path/to/output_folder
1437
```
15-
Here, `-i` specifies the input filepath, `-o` the folder where the results are saved and `-k` the internal path for a zarr or n5 file.
38+
Here, `-i` specifies the input filepath, `-o` the folder where the results are saved and `-k` the internal path in the n5 file.
1639
The `-m` argument specifies the model to use for prediction. You need to give the path to the folder that contains the checkpoint (the `best.pt` file).
1740

1841
You can also run the script for a tif file. In this case you don't need the `-k` parameter:
@@ -31,8 +54,4 @@ to downsample the input by a factor of 2. Note that the segmentation result will
3154

3255
In addition, the script `postprocess_seg.py` can be used to filter out false positive nucleus segmentations from regions in the segmentation with a low density of segmented nuclei.
3356

34-
You can use the script `to_tif.py` to convert the zarr object to a tif volume for easier viewing (won't work for very large volumes!).
35-
36-
## Installation
37-
38-
Needs [torch-em](https://github.com/constantinpape/torch-em) in the python environment. See [here](https://github.com/constantinpape/torch-em?tab=readme-ov-file#installation) for installation instructions. (If possible use `mamba` instead of `conda`.)
57+
You can use the script `to_tif.py` to convert the zarr object to a tif volume for easier viewing (won't work for large volumes!).

scripts/data_transfer/README.md

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,30 @@
55
Current approach to the data transfer:
66
- Log in to SCC login node:
77
$
8-
- Go to `/scratch1/projects/cca/data/moser`
8+
- Go to "/scratch1/projects/cca/data/moser"
99
- Create subfolder <NAME> for cochlea to be copied
10-
- Log in via
11-
```
12-
$ smbclient \\\\wfs-medizin.top.gwdg.de\\ukon-all\$\\ukon100 -U GWDG\\pape41"
13-
```
10+
- Log in via $ smbclient \\\\wfs-medizin.top.gwdg.de\\ukon-all\$\\ukon100 -U GWDG\\pape41"
1411
- Go to the folder with the cochlea to copy (cd works)
1512
- Copy the folder via:
1613
- recurse ON
1714
- prompt OFF
1815
- mget *
1916
- Copy this to HLRN by logging into it and running
20-
```
17+
$ rsync pape41:/scratch1/projects/cca/data/moser/<NAME>
2118
$ rsync -e "ssh -i ~/.ssh/id_rsa_hlrn" -avz [email protected]:/scratch1/projects/cca/data/mose
22-
r/<NAME> /mnt/lustre-emmy-hdd/projects/nim00007/data/moser/lightsheet/volumes/<NAME>
23-
```
19+
r/<NAME> /mnt/lustre-grete/usr/u12086/moser/lightsheet/<NAME>
2420
- Remove on SCC
2521

2622
## Next files
2723

2824
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\M171_2R_converted_n5
29-
- unclear what the converted data is
30-
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\155_1L_converted_n5\BDVexport.n5
31-
- Copied to SCC, need to rsync.
25+
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\155_1L_converted_n5
3226
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\MLR151_2R_converted_n5
3327
- UKON100\archiv\imaging\Lightsheet\Huiskengroup_CTLSM\2024\G11_1L_converted_n5
3428

3529
## Improvements
3630

3731
Try to automate via https://github.com/jborean93/smbprotocol see `sync_smb.py` for ChatGPT's inital version.
38-
Connection not possible from HLRN.
3932

4033
## Transfer Back
4134

scripts/prediction/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
credentials*
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import argparse
2+
import os
3+
from shutil import rmtree
4+
5+
import pybdv.metadata as bdv_metadata
6+
import torch
7+
import z5py
8+
9+
from flamingo_tools.segmentation import run_unet_prediction, filter_isolated_objects
10+
from flamingo_tools.mobie import add_raw_to_mobie, add_segmentation_to_mobie
11+
12+
MOBIE_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/moser/lightsheet/mobie"
13+
14+
15+
def postprocess_seg(output_folder):
16+
print("Run segmentation postprocessing ...")
17+
seg_path = os.path.join(output_folder, "segmentation.zarr")
18+
seg_key = "segmentation"
19+
20+
with z5py.File(seg_path, "r") as f:
21+
segmentation = f[seg_key][:]
22+
23+
seg_filtered, n_pre, n_post = filter_isolated_objects(segmentation)
24+
25+
with z5py.File(seg_path, "a") as f:
26+
chunks = f[seg_key].chunks
27+
f.create_dataset(
28+
"segmentation_postprocessed", data=seg_filtered, compression="gzip",
29+
chunks=chunks, dtype=seg_filtered.dtype
30+
)
31+
32+
33+
def export_to_mobie(xml_path, segmentation_folder, scale, mobie_dataset, chunks):
34+
# Add to mobie:
35+
36+
# - raw data (if not yet present)
37+
add_raw_to_mobie(
38+
mobie_project=MOBIE_ROOT,
39+
mobie_dataset=mobie_dataset,
40+
source_name="pv-channel",
41+
xml_path=xml_path,
42+
setup_id=0,
43+
)
44+
45+
# TODO enable passing extra channel names
46+
# - additional channels
47+
setup_ids = bdv_metadata.get_setup_ids(xml_path)
48+
if len(setup_ids) > 1:
49+
extra_channel_names = ["gfp_channel", "myo_channel"]
50+
for i, setup_id in enumerate(setup_ids[1:]):
51+
add_raw_to_mobie(
52+
mobie_project=MOBIE_ROOT,
53+
mobie_dataset=mobie_dataset,
54+
source_name=extra_channel_names[i],
55+
xml_path=xml_path,
56+
setup_id=setup_id
57+
)
58+
59+
# - segmentation and post-processed segmentation
60+
seg_path = os.path.join(segmentation_folder, "segmentation.zarr")
61+
seg_resolution = bdv_metadata.get_resolution(xml_path, setup_id=0)
62+
if scale == 1:
63+
seg_resolution = [2 * res for res in seg_resolution]
64+
unit = bdv_metadata.get_unit(xml_path, setup_id=0)
65+
66+
seg_key = "segmentation"
67+
seg_name = "nuclei_fullscale" if scale == 0 else "nuclei_downscaled"
68+
add_segmentation_to_mobie(
69+
mobie_project=MOBIE_ROOT,
70+
mobie_dataset=mobie_dataset,
71+
source_name=seg_name,
72+
segmentation_path=seg_path,
73+
segmentation_key=seg_key,
74+
resolution=seg_resolution,
75+
unit=unit,
76+
scale_factors=4*[[2, 2, 2]],
77+
chunks=chunks,
78+
)
79+
80+
seg_key = "segmentation_postprocessed"
81+
seg_name += "_postprocessed"
82+
add_segmentation_to_mobie(
83+
mobie_project=MOBIE_ROOT,
84+
mobie_dataset=mobie_dataset,
85+
source_name=seg_name,
86+
segmentation_path=seg_path,
87+
segmentation_key=seg_key,
88+
resolution=seg_resolution,
89+
unit=unit,
90+
scale_factors=4*[[2, 2, 2]],
91+
chunks=chunks,
92+
)
93+
94+
95+
def main():
96+
parser = argparse.ArgumentParser()
97+
parser.add_argument("-i", "--input", required=True)
98+
parser.add_argument("-o", "--output_folder", required=True)
99+
parser.add_argument("-s", "--scale", required=True, type=int)
100+
parser.add_argument("-m", "--mobie_dataset", required=True)
101+
parser.add_argument("--model")
102+
103+
args = parser.parse_args()
104+
105+
scale = args.scale
106+
if scale == 0:
107+
min_size = 1000
108+
elif scale == 1:
109+
min_size = 250
110+
else:
111+
raise ValueError
112+
113+
xml_path = args.input
114+
assert os.path.splitext(xml_path)[1] == ".xml"
115+
input_path = bdv_metadata.get_data_path(xml_path, return_absolute_path=True)
116+
117+
# TODO need to make sure that PV is always setup 0
118+
input_key = f"setup0/timepoint0/s{scale}"
119+
120+
have_cuda = torch.cuda.is_available()
121+
chunks = z5py.File(input_path, "r")[input_key].chunks
122+
block_shape = tuple([2 * ch for ch in chunks]) if have_cuda else tuple(chunks)
123+
halo = (16, 64, 64) if have_cuda else (8, 32, 32)
124+
125+
if args.model is not None:
126+
model = args.model
127+
else:
128+
if scale == 0:
129+
model = "../training/checkpoints/cochlea_distance_unet"
130+
else:
131+
model = "../training/checkpoints/cochlea_distance_unet-train-downsampled"
132+
133+
run_unet_prediction(
134+
input_path, input_key, args.output_folder, model,
135+
scale=None, min_size=min_size,
136+
block_shape=block_shape, halo=halo,
137+
)
138+
139+
postprocess_seg(args.output_folder)
140+
141+
export_to_mobie(xml_path, args.output_folder, scale, args.mobie_dataset, chunks)
142+
143+
# clean up: remove segmentation folders
144+
print("Cleaning up intermediate segmentation results")
145+
print("This may take a while, but everything else is done.")
146+
print("You can check the results in the MoBIE project already at:")
147+
print(f"{MOBIE_ROOT}:{args.mobie_dataset}")
148+
rmtree(args.output_folder)
149+
150+
151+
if __name__ == "__main__":
152+
main()

0 commit comments

Comments
 (0)