Skip to content

Commit 20215f5

Browse files
authored
[ds-api] Don't warn about unaligned/compressed writes if these are at dataset boundary (#378)
* don't raise warning if unaligned data is at the border of the bbox * format code * adjust the test to use the correct file dimensions * implement PR feedback * update changelog
1 parent 30fe1a7 commit 20215f5

File tree

6 files changed

+102
-46
lines changed

6 files changed

+102
-46
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1515

1616
### Changed
1717
- Bump scipy to `1.6.0` and `scikit-image` to `0.18.0` while keeping `numpy` to under `1.20.0` [#372](https://github.com/scalableminds/webknossos-cuber/pull/372/files)
18+
- Avoid warnings for compressed/unaligned data, if the data is directly at the border of the bounding box. [#378](https://github.com/scalableminds/webknossos-cuber/pull/378)
1819

1920
### Fixed
2021
- Fixes a bug where modifications to an existing dataset with floats as dtype failed. [#375](https://github.com/scalableminds/webknossos-cuber/pull/375)

tests/test_dataset.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,20 +1048,20 @@ def test_writing_subset_of_compressed_data() -> None:
10481048
# create uncompressed dataset
10491049
Dataset.create(TESTOUTPUT_DIR / "compressed_data", scale=(1, 1, 1)).add_layer(
10501050
"color", LayerCategories.COLOR_TYPE
1051-
).add_mag("1", block_len=8, file_len=8).write(
1052-
(np.random.rand(20, 40, 60) * 255).astype(np.uint8)
1051+
).add_mag("2", block_len=8, file_len=8).write(
1052+
(np.random.rand(120, 140, 160) * 255).astype(np.uint8)
10531053
)
10541054

10551055
# compress data
10561056
compress_mag_inplace(
10571057
(TESTOUTPUT_DIR / "compressed_data").resolve(),
10581058
layer_name="color",
1059-
mag=Mag("1"),
1059+
mag=Mag("2"),
10601060
)
10611061

10621062
# open compressed dataset
10631063
compressed_mag = (
1064-
Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
1064+
Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("2")
10651065
)
10661066

10671067
with warnings.catch_warnings():
@@ -1082,6 +1082,26 @@ def test_writing_subset_of_compressed_data() -> None:
10821082
data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
10831083
)
10841084

1085+
assert compressed_mag._mag_view_bounding_box_at_creation == BoundingBox(
1086+
topleft=(
1087+
0,
1088+
0,
1089+
0,
1090+
),
1091+
size=(120, 140, 160),
1092+
)
1093+
# Writing unaligned data to the edge of the bounding box of the MagView does not raise an error.
1094+
# This write operation writes unaligned data into the bottom-right corner of the MagView.
1095+
compressed_mag.write(
1096+
offset=(64, 64, 64),
1097+
data=(np.random.rand(56, 76, 96) * 255).astype(np.uint8),
1098+
)
1099+
# This also works for normal Views but they only use the bounding box at the time of creation as reference.
1100+
compressed_mag.get_view().write(
1101+
offset=(64, 64, 64),
1102+
data=(np.random.rand(56, 76, 96) * 255).astype(np.uint8),
1103+
)
1104+
10851105
# Writing aligned data does not raise a warning. Therefore, this does not fail with these strict settings.
10861106
compressed_mag.write(data=(np.random.rand(64, 64, 64) * 255).astype(np.uint8))
10871107

tests/test_downsampling.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import warnings
23
from pathlib import Path
34
from typing import Tuple, cast
45

@@ -21,7 +22,6 @@
2122
from wkcuber.downsampling_utils import _mode, non_linear_filter_3d
2223
import shutil
2324

24-
WKW_CUBE_SIZE = 1024
2525
CUBE_EDGE_LEN = 256
2626

2727
TESTOUTPUT_DIR = Path("testoutput")
@@ -93,23 +93,20 @@ def downsample_test_helper(use_compress: bool) -> None:
9393
pass
9494

9595
source_ds = Dataset(source_path)
96-
source_layer = source_ds.get_layer("color")
97-
mag1 = source_layer.get_mag("1")
96+
target_ds = source_ds.copy_dataset(target_path, block_len=16, file_len=16)
97+
98+
target_layer = target_ds.get_layer("color")
99+
mag1 = target_layer.get_mag("1")
100+
target_layer.delete_mag("2-2-1") # This is not needed for this test
98101

99-
target_ds = Dataset.create(target_path, scale=(1, 1, 1))
100-
target_layer = target_ds.add_layer(
101-
"color", LayerCategories.COLOR_TYPE, dtype_per_channel="uint8"
102-
)
103-
# The bounding box has to be set here explicitly because the downsampled data is written to a different dataset.
104-
target_layer.set_bounding_box(
105-
source_layer.get_bounding_box().topleft, source_layer.get_bounding_box().size
106-
)
107102
mag2 = target_layer._initialize_mag_from_other_mag("2", mag1, use_compress)
108103

109-
offset = (WKW_CUBE_SIZE, 2 * WKW_CUBE_SIZE, 0)
110-
target_offset = cast(Tuple[int, int, int], tuple([o // 2 for o in offset]))
111-
source_size = cast(Tuple[int, int, int], (CUBE_EDGE_LEN * 2,) * 3)
112-
target_size = cast(Tuple[int, int, int], (CUBE_EDGE_LEN,) * 3)
104+
# The actual size of mag1 is (4600, 4600, 512).
105+
# To keep this test case fast, we are only downsampling a small part
106+
offset = (4096, 4096, 0)
107+
target_offset = (2048, 2048, 0)
108+
source_size = (504, 504, 512)
109+
target_size = (252, 252, 256)
113110
source_buffer = mag1.read(
114111
offset=offset,
115112
size=source_size,
@@ -127,7 +124,7 @@ def downsample_test_helper(use_compress: bool) -> None:
127124
),
128125
[2, 2, 2],
129126
InterpolationModes.MAX,
130-
CUBE_EDGE_LEN,
127+
128,
131128
100,
132129
)
133130

@@ -147,7 +144,9 @@ def test_downsample_cube_job() -> None:
147144

148145

149146
def test_compressed_downsample_cube_job() -> None:
150-
downsample_test_helper(True)
147+
with warnings.catch_warnings():
148+
warnings.filterwarnings("error") # This escalates the warning to an error
149+
downsample_test_helper(True)
151150

152151

153152
def test_downsample_multi_channel() -> None:

wkcuber/api/mag_view.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from wkcuber.api.bounding_box import BoundingBox
2020
from wkcuber.compress_utils import compress_file_job
2121
from wkcuber.utils import (
22-
convert_mag1_size,
2322
convert_mag1_offset,
2423
get_executor_for_args,
2524
wait_and_ensure_success,
@@ -77,23 +76,23 @@ def __init__(
7776
file_len=file_len,
7877
block_type=block_type,
7978
)
79+
80+
self.layer = layer
81+
self.name = name
82+
8083
super().__init__(
8184
_find_mag_path_on_disk(layer.dataset.path, layer.name, name),
8285
header,
8386
cast(
8487
Tuple[int, int, int],
85-
tuple(
86-
convert_mag1_size(layer.get_bounding_box().bottomright, Mag(name))
87-
),
88+
tuple(self._mag_view_bounding_box_at_creation.bottomright),
8889
),
8990
(0, 0, 0),
9091
False,
9192
False,
93+
None,
9294
)
9395

94-
self.layer = layer
95-
self.name = name
96-
9796
if create:
9897
wkw.Dataset.create(
9998
join(layer.dataset.path, layer.name, self.name), self.header
@@ -328,6 +327,14 @@ def _get_file_dimensions(self) -> Tuple[int, int, int]:
328327
Tuple[int, int, int], (self.header.file_len * self.header.block_len,) * 3
329328
)
330329

330+
@property
331+
def _mag_view_bounding_box_at_creation(self) -> BoundingBox:
332+
return (
333+
self.layer.get_bounding_box()
334+
.align_with_mag(Mag(self.name), ceil=True)
335+
.in_mag(Mag(self.name))
336+
)
337+
331338
def __repr__(self) -> str:
332339
return repr(
333340
"MagView(name=%s, global_offset=%s, size=%s)"

wkcuber/api/view.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(
2929
global_offset: Tuple[int, int, int] = (0, 0, 0),
3030
is_bounded: bool = True,
3131
read_only: bool = False,
32+
mag_view_bbox_at_creation: Optional[BoundingBox] = None,
3233
):
3334
"""
3435
Do not use this constructor manually. Instead use `wkcuber.api.mag_view.MagView.get_view()` to get a `View`.
@@ -41,6 +42,16 @@ def __init__(
4142
self._is_bounded = is_bounded
4243
self.read_only = read_only
4344
self._is_opened = False
45+
# The bounding box of the view is used to prevent warnings when writing compressed but unaligned data
46+
# directly at the borders of the bounding box.
47+
# A View is unable to get this information from the Dataset because it is detached from it.
48+
# Adding the bounding box as parameter is a workaround for this.
49+
# However, keep in mind that this bounding box is just a snapshot.
50+
# This bounding box is not updated if the bounding box of the dataset is updated.
51+
# Even though an outdated bounding box can lead to missed (or unwanted) warnings,
52+
# this is sufficient for our use case because such scenarios are unlikely and not critical.
53+
# This should not be misused to infer the size of the dataset because this might lead to problems.
54+
self._mag_view_bbox_at_creation = mag_view_bbox_at_creation
4455

4556
def open(self) -> "View":
4657
"""
@@ -244,6 +255,7 @@ def get_view(
244255
global_offset=view_offset,
245256
is_bounded=True,
246257
read_only=read_only,
258+
mag_view_bbox_at_creation=self._mag_view_bounding_box_at_creation,
247259
)
248260

249261
def _assert_bounds(
@@ -426,24 +438,42 @@ def _handle_compressed_write(
426438
aligned_offset = absolute_offset_np - margin_to_top_left
427439
bottom_right = absolute_offset_np + np.array(data.shape[-3:])
428440
margin_to_bottom_right = (file_bb - (bottom_right % file_bb)) % file_bb
429-
aligned_bottom_right = bottom_right + margin_to_bottom_right
430-
aligned_shape = aligned_bottom_right - aligned_offset
441+
is_bottom_right_aligned = bottom_right + margin_to_bottom_right
442+
aligned_shape = is_bottom_right_aligned - aligned_offset
431443

432444
if (
433445
tuple(aligned_offset) != absolute_offset
434446
or tuple(aligned_shape) != data.shape[-3:]
435447
):
436-
# the data is not aligned
437-
# read the aligned bounding box
438-
439-
# We want to read the data at the absolute offset.
440-
# The absolute offset might be outside of the current view.
441-
# That is the case if the data is compressed but the view does not include the whole file on disk.
442-
# In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
443-
warnings.warn(
444-
"Warning: write() was called on a compressed mag without block alignment. Performance will be degraded as the data has to be padded first.",
445-
RuntimeWarning,
448+
mag_view_bbox_at_creation = self._mag_view_bounding_box_at_creation
449+
450+
# Calculate in which dimensions the data is aligned and in which dimensions it matches the bbox of the mag.
451+
is_top_left_aligned = aligned_offset == np.array(absolute_offset)
452+
is_bottom_right_aligned = is_bottom_right_aligned == bottom_right
453+
is_at_border_top_left = np.array(
454+
mag_view_bbox_at_creation.topleft
455+
) == np.array(absolute_offset)
456+
is_at_border_bottom_right = (
457+
np.array(mag_view_bbox_at_creation.bottomright) == bottom_right
446458
)
459+
460+
if not (
461+
np.all(np.logical_or(is_top_left_aligned, is_at_border_top_left))
462+
and np.all(
463+
np.logical_or(is_bottom_right_aligned, is_at_border_bottom_right)
464+
)
465+
):
466+
# the data is not aligned
467+
# read the aligned bounding box
468+
469+
# We want to read the data at the absolute offset.
470+
# The absolute offset might be outside of the current view.
471+
# That is the case if the data is compressed but the view does not include the whole file on disk.
472+
# In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
473+
warnings.warn(
474+
"Warning: write() was called on a compressed mag without block alignment. Performance will be degraded as the data has to be padded first.",
475+
RuntimeWarning,
476+
)
447477
aligned_data = self._read_without_checks(aligned_offset, aligned_shape)
448478

449479
index_slice = (
@@ -484,6 +514,11 @@ def __repr__(self) -> str:
484514
% (self.path, self.global_offset, self.size)
485515
)
486516

517+
@property
518+
def _mag_view_bounding_box_at_creation(self) -> BoundingBox:
519+
assert self._mag_view_bbox_at_creation is not None
520+
return self._mag_view_bbox_at_creation
521+
487522

488523
def _assert_positive_dimensions(
489524
offset: Tuple[int, int, int], size: Tuple[int, int, int]

wkcuber/utils.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -496,12 +496,6 @@ def named_partial(func: F, *args: Any, **kwargs: Any) -> F:
496496
return partial_func
497497

498498

499-
def convert_mag1_size(
500-
mag1_size: Union[List, np.ndarray], target_mag: Mag
501-
) -> np.ndarray:
502-
return ceil_div_np(np.array(mag1_size), target_mag.as_np())
503-
504-
505499
def convert_mag1_offset(
506500
mag1_offset: Union[List, np.ndarray], target_mag: Mag
507501
) -> np.ndarray:

0 commit comments

Comments
 (0)