Skip to content

Commit 8af8141

Browse files
youri-kphilippotto
andauthored
Globalize Floodfill Script (#461)
* initial version of floodfill script * use skip argument * sort imports * specify encoding and close file handle * improve floodfill performance * improve bounding box contains implementation * sort imports * integrate some PR feedback * chmod +x, make parameters required and make logging less verbose * merge volume data by FS copy + apply strategy, floodfill over 1024-chunks and optimize that * clean up merge code * implement FastVec3Int via subclassing * auto-compute bottomright in bounding box for performance reasons * get rid of FastVec3Int by using a fast staticmethod instead * convert old wkw api usages in globalize_flood_fill to new dataset api and also use Vec3Int everywhere * run full script * auto-select correct chunk size * rename some variables * don't use print * remove 'slow' comments * remove mag_headers logging * implement redownsample; fix instantiation of Vec3Int * format * typing * linting * undo changes in annotation.py * update changelog * clean up argparser * verbose logging by default * format Co-authored-by: Philipp Otto <[email protected]> Co-authored-by: Philipp Otto <[email protected]>
1 parent 377ea67 commit 8af8141

File tree

9 files changed

+493
-44
lines changed

9 files changed

+493
-44
lines changed

webknossos/Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1313
- `wk.Graph` now inherits from `networkx.Graph` directly. Therefore, the `nx_graph` attribute is removed. [#481](https://github.com/scalableminds/webknossos-libs/pull/481)
1414

1515
### Added
16+
- Added `redownsample()` method to `Layer` to recompute existing downsampled magnifications. [#461](https://github.com/scalableminds/webknossos-libs/pull/461)
17+
- Added `globalize_floodfill.py` script to globalize partially computed flood fill operations. [#461](https://github.com/scalableminds/webknossos-libs/pull/461)
18+
1619
### Changed
20+
- Improved performance for calculations with `Vec3Int` and `BoundingBox`. [#461](https://github.com/scalableminds/webknossos-libs/pull/461)
21+
1722
### Fixed
1823

1924

Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
import argparse
2+
import logging
3+
import os
4+
import re
5+
import textwrap
6+
from collections import namedtuple
7+
from contextlib import contextmanager
8+
from pathlib import Path
9+
from tempfile import TemporaryDirectory
10+
from typing import Iterator, List, Set, Tuple
11+
12+
import numpy as np
13+
14+
import webknossos as wk
15+
from webknossos.dataset import Layer, MagView
16+
from webknossos.geometry import BoundingBox, Mag, Vec3Int
17+
from webknossos.utils import add_verbose_flag, setup_logging, time_start, time_stop
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
NEIGHBORS = [
23+
Vec3Int(1, 0, 0),
24+
Vec3Int(-1, 0, 0),
25+
Vec3Int(0, 1, 0),
26+
Vec3Int(0, -1, 0),
27+
Vec3Int(0, 0, 1),
28+
Vec3Int(0, 0, -1),
29+
]
30+
31+
FloodFillBbox = namedtuple(
32+
"FloodFillBbox",
33+
["bounding_box", "seed_position", "source_id", "target_id", "timestamp"],
34+
)
35+
36+
37+
def create_parser() -> argparse.ArgumentParser:
38+
parser = argparse.ArgumentParser(
39+
description=textwrap.dedent(
40+
"""\
41+
Example usage:
42+
The following invocation will create a new dataset at "some/path/new_dataset"
43+
which will be a shallow copy of "existing/dataset" with the exception that
44+
the "segmentation" layer will have the volume data from "annotation/data"
45+
merged in. Additionally, the partial flood-fills which are denoted in
46+
"explorational.nml" will be continued/globalized.
47+
48+
python -m script-collection.globalize_floodfill \\
49+
--output_path some/path/new_dataset \\
50+
--segmentation_layer_path existing/dataset/segmentation \\
51+
--volume_path annotation/data \\
52+
--nml_path explorational.nml
53+
"""
54+
),
55+
formatter_class=argparse.RawDescriptionHelpFormatter,
56+
)
57+
58+
parser.add_argument(
59+
"--volume_path",
60+
"-v",
61+
help="Directory containing the volume tracing.",
62+
type=Path,
63+
required=True,
64+
)
65+
66+
parser.add_argument(
67+
"--segmentation_layer_path",
68+
"-s",
69+
help="Directory containing the segmentation layer.",
70+
type=Path,
71+
required=True,
72+
)
73+
74+
parser.add_argument(
75+
"--nml_path",
76+
"-n",
77+
help="NML that contains the bounding boxes",
78+
type=Path,
79+
required=True,
80+
)
81+
82+
parser.add_argument(
83+
"--output_path", "-o", help="Output directory", type=Path, required=True
84+
)
85+
86+
add_verbose_flag(parser)
87+
88+
return parser
89+
90+
91+
def get_chunk_pos_and_offset(
92+
global_position: Vec3Int, chunk_shape: Vec3Int
93+
) -> Tuple[Vec3Int, Vec3Int]:
94+
offset = global_position % chunk_shape
95+
return (
96+
global_position - offset,
97+
offset,
98+
)
99+
100+
101+
def execute_floodfill(
102+
data_mag: MagView,
103+
seed_position: Vec3Int,
104+
already_processed_bbox: BoundingBox,
105+
source_id: int,
106+
target_id: int,
107+
) -> None:
108+
cube_size = Vec3Int.full(data_mag.header.file_len * data_mag.header.block_len)
109+
cube_bbox = BoundingBox(Vec3Int(0, 0, 0), cube_size)
110+
chunk_with_relative_seed: List[Tuple[Vec3Int, Vec3Int]] = [
111+
get_chunk_pos_and_offset(seed_position, cube_size)
112+
]
113+
114+
# The `is_visited` variable is used to know which parts of the already processed bbox
115+
# were already traversed. Outside of that bounding box, the actual data already
116+
# is an indicator of whether the flood-fill has reached a voxel.
117+
is_visited = np.zeros(already_processed_bbox.size.to_tuple(), dtype=np.uint8)
118+
chunk_count = 0
119+
120+
while len(chunk_with_relative_seed) > 0:
121+
chunk_count += 1
122+
if chunk_count % 10000 == 0:
123+
logger.info(f"Handled seed positions {chunk_count}")
124+
125+
dirty_bucket = False
126+
current_cube, relative_seed = chunk_with_relative_seed.pop()
127+
global_seed = current_cube + relative_seed
128+
129+
# Only reading one voxel for the seed can be up to 30,000 times faster
130+
# which is very relevent, since the chunk doesn't need to be traversed
131+
# if the seed voxel was already covered.
132+
value_at_seed_position = data_mag.read(current_cube + relative_seed, (1, 1, 1))
133+
134+
if value_at_seed_position == source_id or (
135+
already_processed_bbox.contains(global_seed)
136+
and value_at_seed_position == target_id
137+
and not is_visited[global_seed - already_processed_bbox.topleft]
138+
):
139+
logger.info(
140+
f"Handling chunk {chunk_count} with current cube {current_cube}"
141+
)
142+
time_start("read data")
143+
cube_data = data_mag.read(current_cube, cube_size)
144+
cube_data = cube_data[0, :, :, :]
145+
time_stop("read data")
146+
147+
seeds_in_current_chunk: Set[Vec3Int] = set()
148+
seeds_in_current_chunk.add(relative_seed)
149+
150+
time_start("traverse cube")
151+
while len(seeds_in_current_chunk) > 0:
152+
current_relative_seed = seeds_in_current_chunk.pop()
153+
current_global_seed = current_cube + current_relative_seed
154+
if already_processed_bbox.contains(current_global_seed):
155+
is_visited[current_global_seed - already_processed_bbox.topleft] = 1
156+
157+
if cube_data[current_relative_seed] != target_id:
158+
cube_data[current_relative_seed] = target_id
159+
dirty_bucket = True
160+
161+
# check neighbors
162+
for neighbor in NEIGHBORS:
163+
neighbor_pos = current_relative_seed + neighbor
164+
165+
global_neighbor_pos = current_cube + neighbor_pos
166+
if already_processed_bbox.contains(global_neighbor_pos):
167+
if is_visited[
168+
global_neighbor_pos - already_processed_bbox.topleft
169+
]:
170+
continue
171+
if cube_bbox.contains(neighbor_pos):
172+
if cube_data[neighbor_pos] == source_id or (
173+
already_processed_bbox.contains(global_neighbor_pos)
174+
and cube_data[neighbor_pos] == target_id
175+
):
176+
seeds_in_current_chunk.add(neighbor_pos)
177+
else:
178+
chunk_with_relative_seed.append(
179+
get_chunk_pos_and_offset(global_neighbor_pos, cube_size)
180+
)
181+
time_stop("traverse cube")
182+
183+
if dirty_bucket:
184+
time_start("write chunk")
185+
data_mag.write(cube_data, current_cube)
186+
time_stop("write chunk")
187+
188+
189+
@contextmanager
190+
def temporary_annotation_view(volume_annotation_path: Path) -> Iterator[Layer]:
191+
192+
"""
193+
Given a volume annotation path, create a temporary dataset which
194+
contains the volume annotation via a symlink. Yield the layer
195+
so that one can work with the annotation as a wk.Dataset.
196+
"""
197+
198+
with TemporaryDirectory() as tmp_annotation_dir:
199+
tmp_annotation_dataset_path = (
200+
Path(tmp_annotation_dir) / "tmp_annotation_dataset"
201+
)
202+
203+
input_annotation_dataset = wk.Dataset.get_or_create(
204+
str(tmp_annotation_dataset_path), scale=(1, 1, 1)
205+
)
206+
207+
# Ideally, the following code would be used, but there are two problems:
208+
# - save_volume_annotation cannot deal with the
209+
# new named volume annotation layers, yet
210+
# - save_volume_annotation tries to read the entire data (beginning from 0, 0, 0)
211+
# to infer the largest_segment_id which can easily exceed the available RAM.
212+
#
213+
# volume_annotation = open_annotation(volume_annotation_path)
214+
# input_annotation_layer = volume_annotation.save_volume_annotation(
215+
# input_annotation_dataset, "volume_annotation"
216+
# )
217+
218+
os.symlink(volume_annotation_path, tmp_annotation_dataset_path / "segmentation")
219+
input_annotation_layer = input_annotation_dataset.add_layer_for_existing_files(
220+
layer_name="segmentation",
221+
category="segmentation",
222+
largest_segment_id=0, # This is incorrect, but for globalize_floodfill not relevant.
223+
)
224+
225+
yield input_annotation_layer
226+
227+
228+
def merge_with_fallback_layer(
229+
output_path: Path,
230+
volume_annotation_path: Path,
231+
segmentation_layer_path: Path,
232+
) -> MagView:
233+
234+
assert not output_path.exists(), f"Dataset at {output_path} already exists"
235+
236+
# Prepare output dataset by creatign a shallow copy of the dataset
237+
# determined by segmentation_layer_path, but do a deep copy of
238+
# segmentation_layer_path itself (so that we can mutate it).
239+
input_segmentation_dataset = wk.Dataset(segmentation_layer_path.parent)
240+
time_start("Prepare output dataset")
241+
output_dataset = input_segmentation_dataset.shallow_copy_dataset(
242+
output_path,
243+
name=output_path.name,
244+
make_relative=True,
245+
layers_to_ignore=[segmentation_layer_path.name],
246+
)
247+
output_layer = output_dataset.add_copy_layer(
248+
segmentation_layer_path, segmentation_layer_path.name
249+
)
250+
time_stop("Prepare output dataset")
251+
252+
input_segmentation_mag = input_segmentation_dataset.get_layer(
253+
segmentation_layer_path.name
254+
).get_best_mag()
255+
with temporary_annotation_view(volume_annotation_path) as input_annotation_layer:
256+
input_annotation_mag = input_annotation_layer.get_best_mag()
257+
bboxes = list(
258+
BoundingBox.from_tuple2(tuple)
259+
for tuple in input_annotation_mag.get_bounding_boxes_on_disk()
260+
)
261+
output_mag = output_layer.get_mag(input_segmentation_mag.mag)
262+
263+
cube_size = output_mag.header.file_len * output_mag.header.block_len
264+
chunks_with_bboxes = BoundingBox.group_boxes_with_aligned_mag(
265+
bboxes, Mag(cube_size)
266+
)
267+
268+
assert (
269+
input_annotation_mag.header.file_len == 1
270+
), "volume annotation must have file_len=1"
271+
assert (
272+
input_annotation_mag.header.voxel_type
273+
== input_segmentation_mag.header.voxel_type
274+
), "Volume annotation must have same dtype as fallback layer"
275+
276+
chunk_count = 0
277+
for chunk, bboxes in chunks_with_bboxes.items():
278+
chunk_count += 1
279+
logger.info(f"Processing chunk {chunk_count}...")
280+
281+
time_start("Read chunk")
282+
data_buffer = output_mag.read(chunk.topleft, chunk.size)[0, :, :, :]
283+
time_stop("Read chunk")
284+
285+
time_start("Read/merge bboxes")
286+
for bbox in bboxes:
287+
read_data = input_annotation_mag.read(bbox.topleft, bbox.size)
288+
data_buffer[bbox.offset(-chunk.topleft).to_slices()] = read_data
289+
time_stop("Read/merge bboxes")
290+
291+
time_start("Write chunk")
292+
output_mag.write(data_buffer, chunk.topleft)
293+
time_stop("Write chunk")
294+
return output_mag
295+
296+
297+
def main(args: argparse.Namespace) -> None:
298+
299+
# Use the skeleton API to read the bounding boxes once
300+
# https://github.com/scalableminds/webknossos-libs/issues/482 is done.
301+
nml_regex = re.compile(
302+
r'<userBoundingBox .*name="Limits of flood-fill \(source_id=(\d+), target_id=(\d+), seed=([\d,]+), timestamp=(\d+)\)".*topLeftX="(\d+)" topLeftY="(\d+)" topLeftZ="(\d+)" width="(\d+)" height="(\d+)" depth="(\d+)" />'
303+
)
304+
305+
bboxes: List[FloodFillBbox] = []
306+
nml_file = open(args.nml_path, "r", encoding="utf-8")
307+
lines = nml_file.readlines()
308+
nml_file.close()
309+
for line in lines:
310+
matches = nml_regex.findall(line)
311+
for match in matches:
312+
# each match is a tuple of (source_id, target_id, seed, timestamp, top_left_x, top_left_y, top_left_z, width, height, depth
313+
bboxes.append(
314+
FloodFillBbox(
315+
bounding_box=BoundingBox(
316+
(match[4], match[5], match[6]), (match[7], match[8], match[9])
317+
),
318+
seed_position=Vec3Int(match[2].split(",")),
319+
source_id=int(match[0]),
320+
target_id=int(match[1]),
321+
timestamp=int(match[3]),
322+
)
323+
)
324+
bboxes = sorted(bboxes, key=lambda x: x.timestamp)
325+
326+
time_start("Merge with fallback layer")
327+
data_mag = merge_with_fallback_layer(
328+
args.output_path,
329+
args.volume_path,
330+
args.segmentation_layer_path,
331+
)
332+
time_stop("Merge with fallback layer")
333+
334+
time_start("All floodfills")
335+
for floodfill in bboxes:
336+
time_start("Floodfill")
337+
execute_floodfill(
338+
data_mag,
339+
floodfill.seed_position,
340+
floodfill.bounding_box,
341+
floodfill.source_id,
342+
floodfill.target_id,
343+
)
344+
time_stop("Floodfill")
345+
time_stop("All floodfills")
346+
347+
time_start("Recompute downsampled mags")
348+
data_mag.layer.redownsample()
349+
time_stop("Recompute downsampled mags")
350+
351+
352+
if __name__ == "__main__":
353+
parsed_args = create_parser().parse_args()
354+
setup_logging(parsed_args)
355+
356+
main(parsed_args)

webknossos/webknossos/annotation/annotation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ def dataset_name(self) -> str:
5959
def save_volume_annotation(
6060
self, dataset: Dataset, layer_name: str = "volume_annotation"
6161
) -> Layer:
62+
# todo pylint: disable=fixme
63+
# the name is about to change with multiple volume annotations
6264
assert "data.zip" in self._filelist
6365
with self._zipfile.open("data.zip") as f:
6466
data_zip = ZipFile(f)
@@ -78,6 +80,9 @@ def save_volume_annotation(
7880
),
7981
)
8082
min_mag_view = layer.mags[min(layer.mags)]
83+
# todo pylint: disable=fixme
84+
# this tries to read the entire DS into memory (beginning from 0, 0, 0).
85+
# if the annotation begins at some other point, this will blow up the RAM unnecessarily.
8186
layer.largest_segment_id = int(min_mag_view.read().max())
8287
return layer
8388

0 commit comments

Comments
 (0)