Skip to content

Commit df6f292

Browse files
authored
Fix compressed from images oom (#920)
* Implement chunked version of _handle_compressed_write. * Add test to verify if data written in chunks is correct. * Run formatter and linter. * Add docstrings and rename methods.
1 parent 9491958 commit df6f292

File tree

5 files changed

+88
-27
lines changed

5 files changed

+88
-27
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1919
### Changed
2020

2121
### Fixed
22+
- Fixed a bug where compression in add_layer_from_images uses too much memory [#900](https://github.com/scalableminds/webknossos-libs/issues/900)
2223

2324

2425
## [0.13.0](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.13.0) - 2023-06-21

webknossos/tests/dataset/test_dataset.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,29 @@ def test_update_new_bounding_box_offset(
541541
assure_exported_properties(ds)
542542

543543

544+
def test_chunked_compressed_write() -> None:
545+
ds_path = prepare_dataset_path(DataFormat.WKW, TESTOUTPUT_DIR)
546+
mag = (
547+
Dataset(ds_path, voxel_size=(1, 1, 1))
548+
.get_or_add_layer("color", COLOR_CATEGORY, data_format=DataFormat.WKW)
549+
.get_or_add_mag("1", compress=True)
550+
)
551+
552+
np.random.seed(1234)
553+
data: np.ndarray = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
554+
555+
# write data in the bottom-right cornor of a shard so that other shards have to be written too
556+
mag.write(data, absolute_offset=mag.info.shard_shape - Vec3Int(5, 5, 5))
557+
558+
assert (
559+
mag.get_view(
560+
absolute_offset=mag.info.shard_shape - Vec3Int(5, 5, 5),
561+
size=Vec3Int(10, 10, 10),
562+
).read()
563+
== data
564+
).all()
565+
566+
544567
@pytest.mark.parametrize("data_format,output_path", DATA_FORMATS_AND_OUTPUT_PATHS)
545568
def test_write_multi_channel_uint8(data_format: DataFormat, output_path: Path) -> None:
546569
ds_path = prepare_dataset_path(data_format, output_path, "multichannel")

webknossos/tests/test_cli.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -202,29 +202,28 @@ def test_convert() -> None:
202202
assert (wkw_path / PROPERTIES_FILE_NAME).exists()
203203

204204

205-
@pytest.mark.filterwarnings("ignore::UserWarning")
206205
def test_convert_with_all_params() -> None:
207206
"""Tests the functionality of convert subcommand."""
208207

209208
with tmp_cwd():
210209
origin_path = TESTDATA_DIR / "tiff"
211210
wkw_path = Path("wkw_from_tiff_extended")
212-
213-
result = runner.invoke(
214-
app,
215-
[
216-
"convert",
217-
"--voxel-size",
218-
"11.0,11.0,11.0",
219-
"--data-format",
220-
"wkw",
221-
"--name",
222-
"wkw_from_tiff",
223-
"--compress",
224-
str(origin_path),
225-
str(wkw_path),
226-
],
227-
)
211+
with pytest.warns(UserWarning):
212+
result = runner.invoke(
213+
app,
214+
[
215+
"convert",
216+
"--voxel-size",
217+
"11.0,11.0,11.0",
218+
"--data-format",
219+
"wkw",
220+
"--name",
221+
"wkw_from_tiff",
222+
"--compress",
223+
str(origin_path),
224+
str(wkw_path),
225+
],
226+
)
228227

229228
assert result.exit_code == 0
230229
assert (wkw_path / PROPERTIES_FILE_NAME).exists()

webknossos/webknossos/cli/convert.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,12 @@ def main(
9292
)
9393

9494
with get_executor_for_args(args=executor_args) as executor:
95-
dataset = Dataset.from_images(
95+
Dataset.from_images(
9696
source,
9797
target,
9898
voxel_size,
9999
name=name,
100100
data_format=data_format,
101101
executor=executor,
102+
compress=compress,
102103
)
103-
# TODO pylint: disable=fixme
104-
# Include this in the from_images() call as soon as issue #900 is resolved
105-
if compress:
106-
dataset.compress()

webknossos/webknossos/dataset/view.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
from argparse import Namespace
33
from pathlib import Path
44
from types import TracebackType
5-
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional, Tuple, Type
5+
from typing import (
6+
TYPE_CHECKING,
7+
Any,
8+
Callable,
9+
Dict,
10+
Iterable,
11+
Iterator,
12+
Optional,
13+
Tuple,
14+
Type,
15+
)
616

717
import numpy as np
818
import wkw
@@ -243,15 +253,46 @@ def write(
243253
current_mag_bbox = mag1_bbox.in_mag(self._mag)
244254

245255
if self._is_compressed():
246-
current_mag_bbox, data = self._handle_compressed_write(
256+
for current_mag_bbox, chunked_data in self._prepare_compressed_write(
247257
current_mag_bbox, data
248-
)
258+
):
259+
self._array.write(current_mag_bbox.topleft, chunked_data)
260+
else:
261+
self._array.write(current_mag_bbox.topleft, data)
249262

250-
self._array.write(current_mag_bbox.topleft, data)
263+
def _prepare_compressed_write(
264+
self, current_mag_bbox: BoundingBox, data: np.ndarray
265+
) -> Iterator[Tuple[BoundingBox, np.ndarray]]:
266+
"""This method takes an arbitrary sized chunk of data with an accompanying bbox,
267+
divides these into chunks of shard_shape size and delegates
268+
the preparation to _prepare_compressed_write_chunk."""
269+
270+
chunked_bboxes = current_mag_bbox.chunk(
271+
self.info.shard_shape,
272+
chunk_border_alignments=self.info.shard_shape,
273+
)
274+
for chunked_bbox in chunked_bboxes:
275+
source_slice: Any
276+
if len(data.shape) == 3:
277+
source_slice = chunked_bbox.offset(
278+
-current_mag_bbox.topleft
279+
).to_slices()
280+
else:
281+
source_slice = (slice(None, None),) + chunked_bbox.offset(
282+
-current_mag_bbox.topleft
283+
).to_slices()
251284

252-
def _handle_compressed_write(
285+
yield self._prepare_compressed_write_chunk(chunked_bbox, data[source_slice])
286+
287+
def _prepare_compressed_write_chunk(
253288
self, current_mag_bbox: BoundingBox, data: np.ndarray
254289
) -> Tuple[BoundingBox, np.ndarray]:
290+
"""This method takes an arbitrary sized chunk of data with an accompanying bbox
291+
(ideally not larger than a shard) and enlarges that chunk to fit the shard it
292+
resides in (by reading the entire shard data and writing the passed data ndarray
293+
into the specified volume). That way, the returned data can be written as a whole
294+
shard which is a requirement for compressed writes."""
295+
255296
aligned_bbox = current_mag_bbox.align_with_mag(self.info.shard_shape, ceil=True)
256297

257298
if current_mag_bbox != aligned_bbox:

0 commit comments

Comments
 (0)