Fix add layer from images for unaligned topleft (#1036)

markbader · web-flow · commit e5d496dc22da · 2024-04-17T14:51:36.000+02:00
* fix issue with from_images when called with unaligned topleft.

* Adapt batch size for Zarr files to avoid corrupted data.

* Update Changelog.md

* Adapt copy to view to use relative bbox to access original image data.

* Replace conversion from nd to 3d bounding box with raising a runtime error.

* Change error message.
diff --git a/webknossos/Changelog.md b/webknossos/Changelog.md
@@ -19,6 +19,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
 ### Changed
 
 ### Fixed
+- Fixed a bug, where using an unaligned topleft value for `add_layer_from_images` leads to corrupted data. [#1036](https://github.com/scalableminds/webknossos-libs/pull/1036)
 
 
 ## [0.14.17](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.14.17) - 2024-04-10
diff --git a/webknossos/tests/dataset/test_add_layer_from_images.py b/webknossos/tests/dataset/test_add_layer_from_images.py
@@ -49,14 +49,14 @@ def test_compare_nd_tifffile(tmp_path: Path) -> None:
         "testdata/4D/4D_series/4D-series.ome.tif",
         layer_name="color",
         category="color",
-        topleft=(100, 100, 55),
+        topleft=(2, 55, 100, 100),
         use_bioformats=True,
         data_format="zarr3",
         chunk_shape=(8, 8, 8),
         chunks_per_shard=(8, 8, 8),
     )
     assert layer.bounding_box.topleft == wk.VecInt(
-        0, 55, 100, 100, axes=("t", "z", "y", "x")
+        2, 55, 100, 100, axes=("t", "z", "y", "x")
     )
     assert layer.bounding_box.size == wk.VecInt(
         7, 5, 167, 439, axes=("t", "z", "y", "x")
diff --git a/webknossos/webknossos/dataset/_utils/pims_images.py b/webknossos/webknossos/dataset/_utils/pims_images.py
@@ -504,14 +504,17 @@ def copy_to_view(
         copy_to_view returns an iterable of image shapes and largest segment ids. When using this
         method a manual update of the bounding box and the largest segment id might be necessary.
         """
-        relative_bbox = args
+        absolute_bbox = args
+        relative_bbox = absolute_bbox.offset(-mag_view.bounding_box.topleft)
 
         assert all(
             size == 1
-            for size, axis in zip(relative_bbox.size, relative_bbox.axes)
+            for size, axis in zip(absolute_bbox.size, absolute_bbox.axes)
             if axis not in ("x", "y", "z")
         ), "The delivered BoundingBox has to be flat except for x,y and z dimension."
 
+        # z_start and z_end are relative to the bounding box of the mag_view
+        # to access the correct data from the images
         z_start, z_end = relative_bbox.get_bounds("z")
         shapes = []
         max_id: Optional[int]
@@ -522,22 +525,22 @@ def copy_to_view(
 
         with self._open_images() as images:
             if self._iter_axes is not None and self._iter_loop_size is not None:
-                # select the range of images that represents one xyz combination
+                # select the range of images that represents one xyz combination in the mag_view
                 lower_bounds = sum(
                     self._iter_loop_size[axis_name]
                     * relative_bbox.get_bounds(axis_name)[0]
                     for axis_name in self._iter_axes[:-1]
                 )
-                upper_bounds = lower_bounds + relative_bbox.get_shape("z")
+                upper_bounds = lower_bounds + mag_view.bounding_box.get_shape("z")
                 images = images[lower_bounds:upper_bounds]
             if self._flip_z:
                 images = images[::-1]  # pylint: disable=unsubscriptable-object
 
             with mag_view.get_buffered_slice_writer(
                 # Previously only z_start and its end were important, now the slice writer needs to know
                 # which axis is currently written.
-                relative_bounding_box=relative_bbox,
-                buffer_size=mag_view.info.chunk_shape.z,
+                absolute_bounding_box=absolute_bbox,
+                buffer_size=absolute_bbox.get_shape("z"),
                 # copy_to_view is typically used in a multiprocessing-context. Therefore the
                 # buffered slice writer should not update the json file to avoid race conditions.
                 json_update_allowed=False,
diff --git a/webknossos/webknossos/dataset/dataset.py b/webknossos/webknossos/dataset/dataset.py
@@ -35,7 +35,7 @@
 from numpy.typing import DTypeLike
 from upath import UPath
 
-from webknossos.geometry.vec_int import VecInt, VecIntLike
+from webknossos.geometry.vec_int import VecIntLike
 
 from ..client.api_client.models import ApiDataset
 from ..geometry.vec3_int import Vec3Int, Vec3IntLike
@@ -1243,11 +1243,17 @@ def add_layer_from_images(
             )
 
             if batch_size is None:
-                if compress:
+                if compress or (
+                    layer.data_format in (DataFormat.Zarr3, DataFormat.Zarr)
+                ):
+                    # if data is compressed or dataformat is zarr, parallel write access
+                    # to a shard leads to corrupted data, the batch size must be aligned
+                    # with the shard size
                     batch_size = mag_view.info.shard_shape.z
                 else:
+                    # in uncompressed wkw only writing to the same chunk is problematic
                     batch_size = mag_view.info.chunk_shape.z
-            elif compress:
+            elif compress or (layer.data_format in (DataFormat.Zarr3, DataFormat.Zarr)):
                 assert (
                     batch_size % mag_view.info.shard_shape.z == 0
                 ), f"batch_size {batch_size} must be divisible by z shard-size {mag_view.info.shard_shape.z} when creating compressed layers"
@@ -1263,44 +1269,20 @@ def add_layer_from_images(
                 dtype=current_dtype,
             )
 
-            args = []
-            bbox = layer.bounding_box
-            additional_axes = [
-                axis_name for axis_name in bbox.axes if axis_name not in ("x", "y", "z")
-            ]
-            additional_axes_shapes = tuple(
-                product(
-                    *[range(bbox.get_shape(axis_name)) for axis_name in additional_axes]
+            if (
+                set(layer.bounding_box.axes).difference("x", "y", "z")
+            ) and layer.data_format != DataFormat.Zarr3:
+                raise RuntimeError(
+                    "The data stores additional axes other than x, y and z."
                 )
-            )
-            if additional_axes and layer.data_format != DataFormat.Zarr3:
-                assert (
-                    len(additional_axes_shapes) == 1
-                ), "The data stores additional axes with shape bigger than 1. These are only supported by data format Zarr3."
 
-                # Convert NDBoundingBox to 3D BoundingBox
-                bbox = BoundingBox(
-                    bbox.topleft_xyz,
-                    bbox.size_xyz,
+            buffered_slice_writer_shape = layer.bounding_box.size_xyz.with_z(batch_size)
+            args = list(
+                layer.bounding_box.chunk(
+                    buffered_slice_writer_shape,
+                    Vec3Int(1, 1, batch_size),
                 )
-                expected_bbox = bbox
-                additional_axes = []
-
-            z_shape = bbox.get_shape("z")
-            bbox = bbox.with_topleft(VecInt.zeros(bbox.axes))
-            for z_start in range(0, z_shape, batch_size):
-                z_size = min(batch_size, z_shape - z_start)
-                z_bbox = bbox.with_bounds("z", z_start, z_size)
-                if not additional_axes:
-                    args.append(z_bbox)
-                else:
-                    for shape in additional_axes_shapes:
-                        reduced_bbox = z_bbox
-                        for index, axis in enumerate(additional_axes):
-                            reduced_bbox = reduced_bbox.with_bounds(
-                                axis, shape[index], 1
-                            )
-                        args.append(reduced_bbox)
+            )
 
             with warnings.catch_warnings():
                 # Block alignmnent within the dataset should not be a problem, since shard-wise chunking is enforced.