Remove the parameter allow_compressed_write by always allowing it (#356)

rschwanhold · jstriebel · web-flow · commit b5f8f44cbf11 · 2021-07-27T17:22:20.000+02:00
* Remove the parameter allow_compressed_write by always allowing it

* update changelog

* lint and format code

* move comment to  right place

* update doc strings

Co-authored-by: Jonathan Striebel &lt;jstriebel@users.noreply.github.com&gt;
diff --git a/Changelog.md b/Changelog.md
@@ -10,6 +10,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
 [Commits](https://github.com/scalableminds/webknossos-cuber/compare/v0.8.4...HEAD)
 
 ### Breaking Changes in Config & CLI
+- The parameter allow_compressed_write from View.write() is now removed. Writing to compressed magnifications is now always allowed. If the user decides to write unaligned data, a warning about a possible performance impact is displayed once. [#356](https://github.com/scalableminds/webknossos-cuber/pull/356)
 
 ### Added
 - Added functions to `wkcuber.api.dataset.Dataset` and `wkcuber.api.layer.Layer` to set and get the view configuration. [#344](https://github.com/scalableminds/webknossos-cuber/pull/344)
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -2,6 +2,7 @@
 import itertools
 import json
 import os
+import warnings
 from os.path import dirname, join
 from pathlib import Path
 from typing import Any, Tuple, cast, Generator
@@ -962,11 +963,16 @@ def test_writing_subset_of_compressed_data_multi_channel() -> None:
         Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
     )
 
-    write_data2 = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
-    # Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
-    compressed_mag.get_view(offset=(50, 60, 70)).write(
-        offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
-    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=RuntimeWarning, module="wkcuber"
+        )  # This line is not necessary. It simply keeps the output of the tests clean.
+        write_data2 = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
+        # Writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
+        # Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
+        compressed_mag.get_view(offset=(50, 60, 70)).write(
+            offset=(10, 20, 30), data=write_data2
+        )
 
     assert np.array_equal(
         write_data2, compressed_mag.read(offset=(60, 80, 100), size=(10, 10, 10))
@@ -998,11 +1004,16 @@ def test_writing_subset_of_compressed_data_single_channel() -> None:
         Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
     )
 
-    write_data2 = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
-    # Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
-    compressed_mag.get_view(offset=(50, 60, 70)).write(
-        offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
-    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=RuntimeWarning, module="wkcuber"
+        )  # This line is not necessary. It simply keeps the output of the tests clean.
+        write_data2 = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
+        # Writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
+        # Writing compressed data directly to "compressed_mag" also works, but using a View here covers an additional edge case
+        compressed_mag.get_view(offset=(50, 60, 70)).write(
+            offset=(10, 20, 30), data=write_data2
+        )
 
     assert np.array_equal(
         write_data2, compressed_mag.read(offset=(60, 80, 100), size=(10, 10, 10))[0]
@@ -1035,13 +1046,27 @@ def test_writing_subset_of_compressed_data() -> None:
         Dataset(TESTOUTPUT_DIR / "compressed_data").get_layer("color").get_mag("1")
     )
 
-    with pytest.raises(WKWException):
-        # calling 'write' with unaligned data on compressed data without setting 'allow_compressed_write=True'
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=RuntimeWarning, module="wkcuber"
+        )  # This line is not necessary. It simply keeps the output of the tests clean.
         compressed_mag.write(
             offset=(10, 20, 30),
             data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
         )
 
+    with warnings.catch_warnings():
+        # Calling 'write' with unaligned data on compressed data only fails if the warnings are treated as errors.
+        warnings.filterwarnings("error")  # This escalates the warning to an error
+        with pytest.raises(RuntimeWarning):
+            compressed_mag.write(
+                offset=(10, 20, 30),
+                data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
+            )
+
+        # Writing aligned data does not raise a warning. Therefore, this does not fail with these strict settings.
+        compressed_mag.write(data=(np.random.rand(64, 64, 64) * 255).astype(np.uint8))
+
 
 def test_writing_subset_of_chunked_compressed_data() -> None:
     delete_dir(TESTOUTPUT_DIR / "compressed_data")
@@ -1067,20 +1092,22 @@ def test_writing_subset_of_chunked_compressed_data() -> None:
         .get_view(size=(100, 200, 300))
     )
 
-    # Easy case:
-    # The aligned data (offset=(0,0,0), size=(64, 64, 64)) IS fully within the bounding box of the view
-    write_data2 = (np.random.rand(50, 40, 30) * 255).astype(np.uint8)
-    compressed_view.write(
-        offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
-    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=RuntimeWarning, module="wkcuber"
+        )  # This line is not necessary. It simply keeps the output of the tests clean.
 
-    # Advanced case:
-    # The aligned data (offset=(0,0,0), size=(128, 128, 128)) is NOT fully within the bounding box of the view
-    compressed_view.write(
-        offset=(10, 20, 30),
-        data=(np.random.rand(90, 80, 70) * 255).astype(np.uint8),
-        allow_compressed_write=True,
-    )
+        # Easy case:
+        # The aligned data (offset=(0,0,0), size=(64, 64, 64)) IS fully within the bounding box of the view
+        write_data2 = (np.random.rand(50, 40, 30) * 255).astype(np.uint8)
+        compressed_view.write(offset=(10, 20, 30), data=write_data2)
+
+        # Advanced case:
+        # The aligned data (offset=(0,0,0), size=(128, 128, 128)) is NOT fully within the bounding box of the view
+        compressed_view.write(
+            offset=(10, 20, 30),
+            data=(np.random.rand(90, 80, 70) * 255).astype(np.uint8),
+        )
 
     np.array_equal(
         write_data2, compressed_view.read(offset=(10, 20, 30), size=(50, 40, 30))
@@ -1393,14 +1420,14 @@ def test_compression(tmp_path: Path) -> None:
         write_data, mag1.read(offset=(60, 80, 100), size=(10, 20, 30))
     )
 
-    with pytest.raises(wkw.WKWException):
-        # writing unaligned data to a compressed dataset
-        mag1.write((np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8))
-
-    mag1.write(
-        (np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8),
-        allow_compressed_write=True,
-    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=RuntimeWarning, module="wkcuber"
+        )  # This line is not necessary. It simply keeps the output of the tests clean.
+        # writing unaligned data to a compressed dataset works because the data gets padded, but it prints a warning
+        mag1.write(
+            (np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8),
+        )
 
 
 def test_dataset_view_configuration(tmp_path: Path) -> None:
diff --git a/tests/test_downsampling.py b/tests/test_downsampling.py
@@ -128,7 +128,6 @@ def downsample_test_helper(use_compress: bool) -> None:
         [2, 2, 2],
         InterpolationModes.MAX,
         CUBE_EDGE_LEN,
-        use_compress,
         100,
     )
 
@@ -185,7 +184,6 @@ def test_downsample_multi_channel() -> None:
         [2, 2, 2],
         InterpolationModes.MAX,
         CUBE_EDGE_LEN,
-        False,
         100,
     )
 
diff --git a/tests/test_upsampling.py b/tests/test_upsampling.py
@@ -84,7 +84,6 @@ def upsample_test_helper(use_compress: bool) -> None:
             ),
             [0.5, 0.5, 1.0],
             CUBE_EDGE_LEN,
-            use_compress,
             100,
         )
 
@@ -135,7 +134,6 @@ def test_upsample_multi_channel() -> None:
         (mag2.get_view(), l.get_mag("1").get_view(), 0),
         [0.5, 0.5, 0.5],
         CUBE_EDGE_LEN,
-        False,
         100,
     )
 
diff --git a/wkcuber/api/layer.py b/wkcuber/api/layer.py
@@ -117,6 +117,9 @@ def add_mag(
         Creates a new mag called and adds it to the layer.
         The parameter `block_len`, `file_len` and `compress` can be
         specified to adjust how the data is stored on disk.
+        Note that writing compressed data which is not aligned with the blocks on disk may result in
+        diminished performance, as full blocks will automatically be read to pad the write actions. Alternatively,
+        you can call mag.compress() after all the data was written
 
         The return type is `wkcuber.api.mag_view.MagView`.
 
@@ -417,7 +420,6 @@ def downsample_mag(
                 mag_factors=mag_factors,
                 interpolation_mode=parsed_interpolation_mode,
                 buffer_edge_len=buffer_edge_len,
-                compress=compress,
                 job_count_per_log=job_count_per_log,
             )
 
@@ -547,7 +549,6 @@ def upsample(
                     upsample_cube_job,
                     mag_factors=mag_factors,
                     buffer_edge_len=buffer_edge_len,
-                    compress=compress,
                     job_count_per_log=job_count_per_log,
                 )
                 prev_mag_view.get_view().for_zipped_chunks(
diff --git a/wkcuber/api/mag_view.py b/wkcuber/api/mag_view.py
@@ -99,24 +99,18 @@ def __init__(
                 join(layer.dataset.path, layer.name, self.name), self.header
             )
 
-    def write(
-        self,
-        data: np.ndarray,
-        offset: Tuple[int, int, int] = (0, 0, 0),
-        allow_compressed_write: bool = False,
-    ) -> None:
+    def write(self, data: np.ndarray, offset: Tuple[int, int, int] = (0, 0, 0)) -> None:
         """
         Writes the `data` at the specified `offset` to disk (like `wkcuber.api.view.View.write()`).
 
         The `offset` refers to the absolute position, regardless of the offset in the properties (because the global_offset is set to (0, 0, 0)).
         If the data exceeds the original bounding box, the properties are updated.
 
-        If the data on disk is compressed, the passed `data` either has to be aligned with the files on disk
-        or `allow_compressed_write` has to be `True`. If `allow_compressed_write` is `True`, `data` is padded by
-        first reading the necessary padding from disk.
+        Note that writing compressed data which is not aligned with the blocks on disk may result in
+        diminished performance, as full blocks will automatically be read to pad the write actions.
         """
         self._assert_valid_num_channels(data.shape)
-        super().write(data, offset, allow_compressed_write)
+        super().write(data, offset)
         layer_properties = self.layer.dataset.properties.data_layers[self.layer.name]
         current_offset_in_mag1 = layer_properties.get_bounding_box_offset()
         current_size_in_mag1 = layer_properties.get_bounding_box_size()
diff --git a/wkcuber/api/view.py b/wkcuber/api/view.py
@@ -1,4 +1,5 @@
 import math
+import warnings
 from pathlib import Path
 from types import TracebackType
 from typing import Tuple, Optional, Type, Callable, Union, cast
@@ -76,16 +77,13 @@ def write(
         self,
         data: np.ndarray,
         offset: Tuple[int, int, int] = (0, 0, 0),
-        allow_compressed_write: bool = False,
     ) -> None:
         """
         Writes the `data` at the specified `offset` to disk.
         The `offset` is relative to `global_offset`.
 
-        If the data on disk is compressed, the passed `data` either has to be aligned with the files on disk
-        or `allow_compressed_write` has to be `True`. If `allow_compressed_write` is `True`, `data` is padded by
-        first reading the necessary padding from disk.
-        In this particular case, reading data from outside the bounding box is allowed.
+        Note that writing compressed data which is not aligned with the blocks on disk may result in
+        diminished performance, as full blocks will automatically be read to pad the write actions.
         """
         assert not self.read_only, "Cannot write data to an read_only View"
 
@@ -103,7 +101,7 @@ def write(
             tuple(sum(x) for x in zip(self.global_offset, offset)),
         )
 
-        if self._is_compressed() and allow_compressed_write:
+        if self._is_compressed():
             absolute_offset, data = self._handle_compressed_write(absolute_offset, data)
 
         if not was_opened:
@@ -437,16 +435,17 @@ def _handle_compressed_write(
         ):
             # the data is not aligned
             # read the aligned bounding box
-            try:
-                # We want to read the data at the absolute offset.
-                # The absolute offset might be outside of the current view.
-                # That is the case if the data is compressed but the view does not include the whole file on disk.
-                # In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
-                aligned_data = self._read_without_checks(aligned_offset, aligned_shape)
-            except AssertionError as e:
-                raise AssertionError(
-                    f"Writing compressed data failed. The compressed file is not fully inside the bounding box of the view (offset={self.global_offset}, size={self.size})."
-                ) from e
+
+            # We want to read the data at the absolute offset.
+            # The absolute offset might be outside of the current view.
+            # That is the case if the data is compressed but the view does not include the whole file on disk.
+            # In this case we avoid checking the bounds because the aligned_offset and aligned_shape are calculated internally.
+            warnings.warn(
+                "Warning: write() was called on a compressed mag without block alignment. Performance will be degraded as the data has to be padded first.",
+                RuntimeWarning,
+            )
+            aligned_data = self._read_without_checks(aligned_offset, aligned_shape)
+
             index_slice = (
                 slice(None, None),
                 *(
diff --git a/wkcuber/downsampling_utils.py b/wkcuber/downsampling_utils.py
@@ -296,7 +296,6 @@ def downsample_cube_job(
     mag_factors: List[int],
     interpolation_mode: InterpolationModes,
     buffer_edge_len: int,
-    compress: bool,
     job_count_per_log: int,
 ) -> None:
     (source_view, target_view, i) = args
@@ -367,7 +366,7 @@ def downsample_cube_job(
         # Write the downsampled buffer to target
         if source_view.header.num_channels == 1:
             file_buffer = file_buffer[0]  # remove channel dimension
-        target_view.write(file_buffer, allow_compressed_write=compress)
+        target_view.write(file_buffer)
         if use_logging:
             time_stop(f"Downsampling of {target_view.global_offset}")
 
diff --git a/wkcuber/upsampling_utils.py b/wkcuber/upsampling_utils.py
@@ -27,7 +27,6 @@ def upsample_cube_job(
     args: Tuple[View, View, int],
     mag_factors: List[float],
     buffer_edge_len: int,
-    compress: bool,
     job_count_per_log: int,
 ) -> None:
     (source_view, target_view, i) = args
@@ -99,7 +98,7 @@ def upsample_cube_job(
         # Write the upsampled buffer to target
         if source_view.header.num_channels == 1:
             file_buffer = file_buffer[0]  # remove channel dimension
-        target_view.write(file_buffer, allow_compressed_write=compress)
+        target_view.write(file_buffer)
         if use_logging:
             time_stop(f"Upsampling of {target_view.global_offset}")
 

Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,6 @@ def downsample_test_helper(use_compress: bool) -> None:`
`128`	`128`	`[2, 2, 2],`
`129`	`129`	`InterpolationModes.MAX,`
`130`	`130`	`CUBE_EDGE_LEN,`
`131`		`- use_compress,`
`132`	`131`	`100,`
`133`	`132`	`)`
`134`	`133`
`@@ -185,7 +184,6 @@ def test_downsample_multi_channel() -> None:`
`185`	`184`	`[2, 2, 2],`
`186`	`185`	`InterpolationModes.MAX,`
`187`	`186`	`CUBE_EDGE_LEN,`
`188`		`- False,`
`189`	`187`	`100,`
`190`	`188`	`)`
`191`	`189`
Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,6 @@ def upsample_test_helper(use_compress: bool) -> None:`
`84`	`84`	`),`
`85`	`85`	`[0.5, 0.5, 1.0],`
`86`	`86`	`CUBE_EDGE_LEN,`
`87`		`- use_compress,`
`88`	`87`	`100,`
`89`	`88`	`)`
`90`	`89`
`@@ -135,7 +134,6 @@ def test_upsample_multi_channel() -> None:`
`135`	`134`	`(mag2.get_view(), l.get_mag("1").get_view(), 0),`
`136`	`135`	`[0.5, 0.5, 0.5],`
`137`	`136`	`CUBE_EDGE_LEN,`
`138`		`- False,`
`139`	`137`	`100,`
`140`	`138`	`)`
`141`	`139`