Skip to content

Commit b5f8f44

Browse files
Remove the parameter allow_compressed_write by always allowing it (#356)
* Remove the parameter allow_compressed_write by always allowing it * update changelog * lint and format code * move comment to right place * update doc strings Co-authored-by: Jonathan Striebel <[email protected]>
1 parent 1bda6a0 commit b5f8f44

File tree

9 files changed

+85
-69
lines changed

9 files changed

+85
-69
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1010
[Commits](https://github.com/scalableminds/webknossos-cuber/compare/v0.8.4...HEAD)
1111

1212
### Breaking Changes in Config & CLI
13+
- The parameter allow_compressed_write from View.write() is now removed. Writing to compressed magnifications is now always allowed. If the user decides to write unaligned data, a warning about a possible performance impact is displayed once. [#356](https://github.com/scalableminds/webknossos-cuber/pull/356)
1314

1415
### Added
1516
- Added functions to `wkcuber.api.dataset.Dataset` and `wkcuber.api.layer.Layer` to set and get the view configuration. [#344](https://github.com/scalableminds/webknossos-cuber/pull/344)

tests/test_dataset.py

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import itertools
33
import json
44
import os
5+
import warnings
56
from os.path import dirname, join
67
from pathlib import Path
78
from typing import Any, Tuple, cast, Generator
@@ -962,11 +963,16 @@ def test_writing_subset_of_compressed_data_multi_channel() -> None:
962963
Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
963964
)
964965

965-
write_data2 = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
966-
# Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
967-
compressed_mag.get_view(offset=(50, 60, 70)).write(
968-
offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
969-
)
966+
with warnings.catch_warnings():
967+
warnings.filterwarnings(
968+
"ignore", category=RuntimeWarning, module="wkcuber"
969+
) # This line is not necessary. It simply keeps the output of the tests clean.
970+
write_data2 = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
971+
# Writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
972+
# Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
973+
compressed_mag.get_view(offset=(50, 60, 70)).write(
974+
offset=(10, 20, 30), data=write_data2
975+
)
970976

971977
assert np.array_equal(
972978
write_data2, compressed_mag.read(offset=(60, 80, 100), size=(10, 10, 10))
@@ -998,11 +1004,16 @@ def test_writing_subset_of_compressed_data_single_channel() -> None:
9981004
Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
9991005
)
10001006

1001-
write_data2 = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
1002-
# Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
1003-
compressed_mag.get_view(offset=(50, 60, 70)).write(
1004-
offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
1005-
)
1007+
with warnings.catch_warnings():
1008+
warnings.filterwarnings(
1009+
"ignore", category=RuntimeWarning, module="wkcuber"
1010+
) # This line is not necessary. It simply keeps the output of the tests clean.
1011+
write_data2 = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
1012+
# Writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
1013+
# Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
1014+
compressed_mag.get_view(offset=(50, 60, 70)).write(
1015+
offset=(10, 20, 30), data=write_data2
1016+
)
10061017

10071018
assert np.array_equal(
10081019
write_data2, compressed_mag.read(offset=(60, 80, 100), size=(10, 10, 10))[0]
@@ -1035,13 +1046,27 @@ def test_writing_subset_of_compressed_data() -> None:
10351046
Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
10361047
)
10371048

1038-
with pytest.raises(WKWException):
1039-
# calling 'write' with unaligned data on compressed data without setting 'allow_compressed_write=True'
1049+
with warnings.catch_warnings():
1050+
warnings.filterwarnings(
1051+
"ignore", category=RuntimeWarning, module="wkcuber"
1052+
) # This line is not necessary. It simply keeps the output of the tests clean.
10401053
compressed_mag.write(
10411054
offset=(10, 20, 30),
10421055
data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
10431056
)
10441057

1058+
with warnings.catch_warnings():
1059+
# Calling 'write' with unaligned data on compressed data only fails if the warnings are treated as errors.
1060+
warnings.filterwarnings("error") # This escalates the warning to an error
1061+
with pytest.raises(RuntimeWarning):
1062+
compressed_mag.write(
1063+
offset=(10, 20, 30),
1064+
data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
1065+
)
1066+
1067+
# Writing aligned data does not raise a warning. Therefore, this does not fail with these strict settings.
1068+
compressed_mag.write(data=(np.random.rand(64, 64, 64) * 255).astype(np.uint8))
1069+
10451070

10461071
def test_writing_subset_of_chunked_compressed_data() -> None:
10471072
delete_dir(TESTOUTPUT_DIR / "compressed_data")
@@ -1067,20 +1092,22 @@ def test_writing_subset_of_chunked_compressed_data() -> None:
10671092
.get_view(size=(100, 200, 300))
10681093
)
10691094

1070-
# Easy case:
1071-
# The aligned data (offset=(0,0,0), size=(64, 64, 64)) IS fully within the bounding box of the view
1072-
write_data2 = (np.random.rand(50, 40, 30) * 255).astype(np.uint8)
1073-
compressed_view.write(
1074-
offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
1075-
)
1095+
with warnings.catch_warnings():
1096+
warnings.filterwarnings(
1097+
"ignore", category=RuntimeWarning, module="wkcuber"
1098+
) # This line is not necessary. It simply keeps the output of the tests clean.
10761099

1077-
# Advanced case:
1078-
# The aligned data (offset=(0,0,0), size=(128, 128, 128)) is NOT fully within the bounding box of the view
1079-
compressed_view.write(
1080-
offset=(10, 20, 30),
1081-
data=(np.random.rand(90, 80, 70) * 255).astype(np.uint8),
1082-
allow_compressed_write=True,
1083-
)
1100+
# Easy case:
1101+
# The aligned data (offset=(0,0,0), size=(64, 64, 64)) IS fully within the bounding box of the view
1102+
write_data2 = (np.random.rand(50, 40, 30) * 255).astype(np.uint8)
1103+
compressed_view.write(offset=(10, 20, 30), data=write_data2)
1104+
1105+
# Advanced case:
1106+
# The aligned data (offset=(0,0,0), size=(128, 128, 128)) is NOT fully within the bounding box of the view
1107+
compressed_view.write(
1108+
offset=(10, 20, 30),
1109+
data=(np.random.rand(90, 80, 70) * 255).astype(np.uint8),
1110+
)
10841111

10851112
np.array_equal(
10861113
write_data2, compressed_view.read(offset=(10, 20, 30), size=(50, 40, 30))
@@ -1393,14 +1420,14 @@ def test_compression(tmp_path: Path) -> None:
13931420
write_data, mag1.read(offset=(60, 80, 100), size=(10, 20, 30))
13941421
)
13951422

1396-
with pytest.raises(wkw.WKWException):
1397-
# writing unaligned data to a compressed dataset
1398-
mag1.write((np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8))
1399-
1400-
mag1.write(
1401-
(np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8),
1402-
allow_compressed_write=True,
1403-
)
1423+
with warnings.catch_warnings():
1424+
warnings.filterwarnings(
1425+
"ignore", category=RuntimeWarning, module="wkcuber"
1426+
) # This line is not necessary. It simply keeps the output of the tests clean.
1427+
# writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
1428+
mag1.write(
1429+
(np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8),
1430+
)
14041431

14051432

14061433
def test_dataset_view_configuration(tmp_path: Path) -> None:

tests/test_downsampling.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ def downsample_test_helper(use_compress: bool) -> None:
128128
[2, 2, 2],
129129
InterpolationModes.MAX,
130130
CUBE_EDGE_LEN,
131-
use_compress,
132131
100,
133132
)
134133

@@ -185,7 +184,6 @@ def test_downsample_multi_channel() -> None:
185184
[2, 2, 2],
186185
InterpolationModes.MAX,
187186
CUBE_EDGE_LEN,
188-
False,
189187
100,
190188
)
191189

tests/test_upsampling.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ def upsample_test_helper(use_compress: bool) -> None:
8484
),
8585
[0.5, 0.5, 1.0],
8686
CUBE_EDGE_LEN,
87-
use_compress,
8887
100,
8988
)
9089

@@ -135,7 +134,6 @@ def test_upsample_multi_channel() -> None:
135134
(mag2.get_view(), l.get_mag("1").get_view(), 0),
136135
[0.5, 0.5, 0.5],
137136
CUBE_EDGE_LEN,
138-
False,
139137
100,
140138
)
141139

wkcuber/api/layer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ def add_mag(
117117
Creates a new mag called and adds it to the layer.
118118
The parameter `block_len`, `file_len` and `compress` can be
119119
specified to adjust how the data is stored on disk.
120+
Note that writing compressed data which is not aligned with the blocks on disk may result in
121+
diminished performance, as full blocks will automatically be read to pad the write actions. Alternatively,
122+
you can call mag.compress() after all the data was written
120123
121124
The return type is `wkcuber.api.mag_view.MagView`.
122125
@@ -417,7 +420,6 @@ def downsample_mag(
417420
mag_factors=mag_factors,
418421
interpolation_mode=parsed_interpolation_mode,
419422
buffer_edge_len=buffer_edge_len,
420-
compress=compress,
421423
job_count_per_log=job_count_per_log,
422424
)
423425

@@ -547,7 +549,6 @@ def upsample(
547549
upsample_cube_job,
548550
mag_factors=mag_factors,
549551
buffer_edge_len=buffer_edge_len,
550-
compress=compress,
551552
job_count_per_log=job_count_per_log,
552553
)
553554
prev_mag_view.get_view().for_zipped_chunks(

wkcuber/api/mag_view.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,18 @@ def __init__(
9999
join(layer.dataset.path, layer.name, self.name), self.header
100100
)
101101

102-
def write(
103-
self,
104-
data: np.ndarray,
105-
offset: Tuple[int, int, int] = (0, 0, 0),
106-
allow_compressed_write: bool = False,
107-
) -> None:
102+
def write(self, data: np.ndarray, offset: Tuple[int, int, int] = (0, 0, 0)) -> None:
108103
"""
109104
Writes the `data` at the specified `offset` to disk (like `wkcuber.api.view.View.write()`).
110105
111106
The `offset` refers to the absolute position, regardless of the offset in the properties (because the global_offset is set to (0, 0, 0)).
112107
If the data exceeds the original bounding box, the properties are updated.
113108
114-
If the data on disk is compressed, the passed `data` either has to be aligned with the files on disk
115-
or `allow_compressed_write` has to be `True`. If `allow_compressed_write` is `True`, `data` is padded by
116-
first reading the necessary padding from disk.
109+
Note that writing compressed data which is not aligned with the blocks on disk may result in
110+
diminished performance, as full blocks will automatically be read to pad the write actions.
117111
"""
118112
self._assert_valid_num_channels(data.shape)
119-
super().write(data, offset, allow_compressed_write)
113+
super().write(data, offset)
120114
layer_properties = self.layer.dataset.properties.data_layers[self.layer.name]
121115
current_offset_in_mag1 = layer_properties.get_bounding_box_offset()
122116
current_size_in_mag1 = layer_properties.get_bounding_box_size()

wkcuber/api/view.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import math
2+
import warnings
23
from pathlib import Path
34
from types import TracebackType
45
from typing import Tuple, Optional, Type, Callable, Union, cast
@@ -76,16 +77,13 @@ def write(
7677
self,
7778
data: np.ndarray,
7879
offset: Tuple[int, int, int] = (0, 0, 0),
79-
allow_compressed_write: bool = False,
8080
) -> None:
8181
"""
8282
Writes the `data` at the specified `offset` to disk.
8383
The `offset` is relative to `global_offset`.
8484
85-
If the data on disk is compressed, the passed `data` either has to be aligned with the files on disk
86-
or `allow_compressed_write` has to be `True`. If `allow_compressed_write` is `True`, `data` is padded by
87-
first reading the necessary padding from disk.
88-
In this particular case, reading data from outside the bounding box is allowed.
85+
Note that writing compressed data which is not aligned with the blocks on disk may result in
86+
diminished performance, as full blocks will automatically be read to pad the write actions.
8987
"""
9088
assert not self.read_only, "Cannot write data to an read_only View"
9189

@@ -103,7 +101,7 @@ def write(
103101
tuple(sum(x) for x in zip(self.global_offset, offset)),
104102
)
105103

106-
if self._is_compressed() and allow_compressed_write:
104+
if self._is_compressed():
107105
absolute_offset, data = self._handle_compressed_write(absolute_offset, data)
108106

109107
if not was_opened:
@@ -437,16 +435,17 @@ def _handle_compressed_write(
437435
):
438436
# the data is not aligned
439437
# read the aligned bounding box
440-
try:
441-
# We want to read the data at the absolute offset.
442-
# The absolute offset might be outside of the current view.
443-
# That is the case if the data is compressed but the view does not include the whole file on disk.
444-
# In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
445-
aligned_data = self._read_without_checks(aligned_offset, aligned_shape)
446-
except AssertionError as e:
447-
raise AssertionError(
448-
f"Writing compressed data failed. The compressed file is not fully inside the bounding box of the view (offset={self.global_offset}, size={self.size})."
449-
) from e
438+
439+
# We want to read the data at the absolute offset.
440+
# The absolute offset might be outside of the current view.
441+
# That is the case if the data is compressed but the view does not include the whole file on disk.
442+
# In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
443+
warnings.warn(
444+
"Warning: write() was called on a compressed mag without block alignment. Performance will be degraded as the data has to be padded first.",
445+
RuntimeWarning,
446+
)
447+
aligned_data = self._read_without_checks(aligned_offset, aligned_shape)
448+
450449
index_slice = (
451450
slice(None, None),
452451
*(

wkcuber/downsampling_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ def downsample_cube_job(
296296
mag_factors: List[int],
297297
interpolation_mode: InterpolationModes,
298298
buffer_edge_len: int,
299-
compress: bool,
300299
job_count_per_log: int,
301300
) -> None:
302301
(source_view, target_view, i) = args
@@ -367,7 +366,7 @@ def downsample_cube_job(
367366
# Write the downsampled buffer to target
368367
if source_view.header.num_channels == 1:
369368
file_buffer = file_buffer[0] # remove channel dimension
370-
target_view.write(file_buffer, allow_compressed_write=compress)
369+
target_view.write(file_buffer)
371370
if use_logging:
372371
time_stop(f"Downsampling of {target_view.global_offset}")
373372

wkcuber/upsampling_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def upsample_cube_job(
2727
args: Tuple[View, View, int],
2828
mag_factors: List[float],
2929
buffer_edge_len: int,
30-
compress: bool,
3130
job_count_per_log: int,
3231
) -> None:
3332
(source_view, target_view, i) = args
@@ -99,7 +98,7 @@ def upsample_cube_job(
9998
# Write the upsampled buffer to target
10099
if source_view.header.num_channels == 1:
101100
file_buffer = file_buffer[0] # remove channel dimension
102-
target_view.write(file_buffer, allow_compressed_write=compress)
101+
target_view.write(file_buffer)
103102
if use_logging:
104103
time_stop(f"Upsampling of {target_view.global_offset}")
105104

0 commit comments

Comments
 (0)