Skip to content

Commit a4bdf48

Browse files
Improve Performance of Cubing (#480)
* WIP improve performance of cubing and use dataset api for tile_cubing * format code and fix bbox offset in tile_cubing * update changelog * fix tile_cubing * fix tile_cubing with dataset api * simplify code * fix merge with master Co-authored-by: Philipp Otto <[email protected]>
1 parent 95d1a12 commit a4bdf48

File tree

4 files changed

+180
-119
lines changed

4 files changed

+180
-119
lines changed

wkcuber/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1414
### Added
1515

1616
### Changed
17+
- Improved the performance of cubing and tile-cubing and integrated the dataset API into tile-cubing. [#480](https://github.com/scalableminds/webknossos-libs/pull/480)
1718

1819
### Fixed
1920

wkcuber/tests/scripts/tile_cubing.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ python -m wkcuber.tile_cubing \
44
--jobs 2 \
55
--batch_size 8 \
66
--layer_name color \
7-
--scale 1 \
7+
--scale 1,1,1 \
88
testdata/temca2 testoutput/temca2
99
[ -d testoutput/temca2/color ]
1010
[ -d testoutput/temca2/color/1 ]

wkcuber/wkcuber/cubing.py

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -165,22 +165,6 @@ def read_image_file(
165165
raise exc
166166

167167

168-
def prepare_slices_for_wkw(
169-
slices: List[np.ndarray], num_channels: Optional[int] = None
170-
) -> np.ndarray:
171-
# Write batch buffer which will have shape (x, y, channel_count, z)
172-
# since we concat along the last axis (z)
173-
buffer = np.concatenate(slices, axis=-1)
174-
175-
# We transpose the data so that the first dimension is the channel,
176-
# since the wkw library expects this.
177-
# New shape will be (channel_count, x, y, z)
178-
buffer = np.transpose(buffer, (2, 0, 1, 3))
179-
if num_channels is not None:
180-
assert buffer.shape[0] == num_channels
181-
return buffer
182-
183-
184168
def cubing_job(
185169
args: Tuple[
186170
View,
@@ -191,6 +175,8 @@ def cubing_job(
191175
bool,
192176
Optional[int],
193177
Optional[int],
178+
str,
179+
int,
194180
]
195181
) -> Any:
196182
(
@@ -202,11 +188,15 @@ def cubing_job(
202188
pad,
203189
channel_index,
204190
sample_index,
191+
dtype,
192+
num_channels,
205193
) = args
206194

207195
downsampling_needed = target_mag != Mag(1)
208196
largest_value_in_chunk = 0 # This is used to compute the largest_segmentation_id if it is a segmentation layer
209197

198+
max_image_size = (target_view.size[0], target_view.size[1])
199+
210200
# Iterate over batches of continuous z sections
211201
# The batches have a maximum size of `batch_size`
212202
# Batched iterations allows to utilize IO more efficiently
@@ -219,7 +209,15 @@ def cubing_job(
219209
first_z_idx, first_z_idx + len(source_file_batch)
220210
)
221211
)
222-
slices = []
212+
213+
# Allocate a large buffer for all images in this batch
214+
# Shape will be (channel_count, x, y, z)
215+
# Using fortran order for the buffer, prevents that the data has to be copied in rust
216+
buffer_shape = (
217+
[num_channels] + list(max_image_size) + [len(source_file_batch)]
218+
)
219+
buffer = np.empty(buffer_shape, dtype=dtype, order="F")
220+
223221
# Iterate over each z section in the batch
224222
for i, file_name in enumerate(source_file_batch):
225223
z = first_z_idx + i
@@ -232,33 +230,26 @@ def cubing_job(
232230
sample_index,
233231
)
234232

235-
if not pad:
236-
assert (
237-
image.shape[0:2] == target_view.size[0:2]
238-
), "Section z={} has the wrong dimensions: {} (expected {}). Consider using --pad.".format(
239-
z, image.shape, target_view.size[0:2]
240-
)
241-
slices.append(image)
242-
243-
if pad:
244-
x_max = target_view.size[0]
245-
y_max = target_view.size[1]
246-
247-
slices = [
248-
np.pad(
249-
_slice,
233+
if pad:
234+
image = np.pad(
235+
image,
250236
mode="constant",
251237
pad_width=[
252-
(0, x_max - _slice.shape[0]),
253-
(0, y_max - _slice.shape[1]),
238+
(0, max_image_size[0] - image.shape[0]),
239+
(0, max_image_size[1] - image.shape[1]),
254240
(0, 0),
255241
(0, 0),
256242
],
257243
)
258-
for _slice in slices
259-
]
244+
else:
245+
assert (
246+
image.shape[0:2] == max_image_size
247+
), "Section z={} has the wrong dimensions: {} (expected {}). Consider using --pad.".format(
248+
z, image.shape, max_image_size
249+
)
250+
buffer[:, :, :, i] = image.transpose((2, 0, 1, 3))[:, :, :, 0]
251+
del image
260252

261-
buffer = prepare_slices_for_wkw(slices, target_view.header.num_channels)
262253
if downsampling_needed:
263254
buffer = downsample_unpadded_data(
264255
buffer, target_mag, interpolation_mode
@@ -434,6 +425,8 @@ def cubing(
434425
pad,
435426
channel_index,
436427
sample_index,
428+
dtype,
429+
target_layer.num_channels,
437430
)
438431
)
439432

0 commit comments

Comments
 (0)