Add BufferedSliceWriter from segmentation tools (#72)

MartinBuessemeyer · web-flow · commit 6f5df5317877 · 2019-05-19T19:40:57.000+02:00
* added BufferedSliceWriter from segmentation tools to utils

* auto-reformatted utils

* removed pid printing

* started working on the test

* finished initial version of buffered slice writer test. gave the buffered slice writer the argument mag

* added z arg to buffered slice writer test

* fixed test bugs

* improved test output

* fixed test by removing zeros and making the imput img nonzero

* removed unnessecary assert statement

* removed shape bug

* added better assertion feedback

* added equivalency test
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,5 +1,8 @@
 import numpy as np
-from wkcuber.utils import get_chunks, get_regular_chunks
+from wkcuber.utils import get_chunks, get_regular_chunks, BufferedSliceWriter
+import wkw
+from wkcuber.mag import Mag
+import os
 
 BLOCK_LEN = 32
 
@@ -27,3 +30,58 @@ def test_get_regular_chunks_max_inclusive():
     assert list(target[0]) == list(range(4, 5))
     # The last chunk should include 44
     assert list(target[-1]) == list(range(44, 45))
+
+
+def test_buffered_slice_writer():
+    test_img = np.arange(24 * 24).reshape(24, 24).astype(np.uint16) + 1
+    dtype = test_img.dtype
+    bbox = {'topleft': (0, 0, 0), 'size': (24, 24, 35)}
+    origin = [0, 0, 0]
+    dataset_dir = 'testoutput/buffered_slice_writer'
+    layer_name = 'color'
+    mag = Mag(1)
+    dataset_path = os.path.join(dataset_dir, layer_name, mag.to_layer_name())
+
+    with BufferedSliceWriter(dataset_dir, layer_name, dtype, bbox, origin, mag=mag) as writer:
+        for i in range(13):
+            writer.write_slice(i, test_img)
+        with wkw.Dataset.open(dataset_path, wkw.Header(dtype)) as data:
+            try:
+                read_data = data.read(origin, (24, 24, 13))
+                if read_data[read_data.nonzero()].size != 0:
+                    raise AssertionError('Nothing should be written on the disk. But found data with shape: {}'
+                                        .format(read_data.shape))
+            except wkw.wkw.WKWException:
+                pass
+
+        for i in range(13, 32):
+            writer.write_slice(i, test_img)
+        with wkw.Dataset.open(dataset_path, wkw.Header(dtype)) as data:
+            read_data = data.read(origin, (24, 24, 32))
+            assert np.squeeze(read_data).shape == (24, 24, 32), "The read data should have the shape: (24, 24, 32) " \
+                                                                "but has a shape of: {}"\
+                                                                .format(np.squeeze(read_data).shape)
+            assert read_data.size == read_data[read_data.nonzero()].size, "The read data contains zeros while the " \
+                                                                          "written image has no zeros"
+
+        for i in range(32, 35):
+            writer.write_slice(i, test_img)
+
+    with wkw.Dataset.open(dataset_path, wkw.Header(dtype)) as data:
+        read_data = data.read(origin, (24, 24, 35))
+        read_data = np.squeeze(read_data)
+        assert read_data.shape == (24, 24, 35), "The read data should have the shape: (24, 24, 35) " \
+                                                                "but has a shape of: {}"\
+                                                                .format(np.squeeze(read_data).shape)
+        assert read_data.size == read_data[read_data.nonzero()].size, "The read data contains zeros while the " \
+                                                                          "written image has no zeros"
+        test_img_3d = np.zeros((test_img.shape[0], test_img.shape[1], 35))
+        for i in np.arange(35):
+            test_img_3d[:, :, i] = test_img
+        # transpose because the slice writer takes [y, x] data and transposes it to [x, y] before writing
+        test_img_3d = np.transpose(test_img_3d, (1, 0, 2))
+        # check if the data are correct
+        assert np.array_equal(test_img_3d, read_data), "The data from the disk is not the same " \
+                                                              "as the data that should be written."
+
+
diff --git a/wkcuber/utils.py b/wkcuber/utils.py
@@ -10,9 +10,10 @@
 from multiprocessing import cpu_count, Lock
 import concurrent
 from concurrent.futures import ProcessPoolExecutor
-from os import path
+from os import path, getpid
 from platform import python_version
 from math import floor, ceil
+from wkcuber.mag import Mag
 
 from .knossos import KnossosDataset, CUBE_EDGE_LEN
 
@@ -179,3 +180,91 @@ def time_stop(identifier):
 def wait_and_ensure_success(futures):
     for fut in concurrent.futures.as_completed(futures):
         fut.result()
+
+
+class BufferedSliceWriter(object):
+    def __init__(
+        self,
+        dataset_path,
+        layer_name,
+        dtype,
+        bounding_box,
+        origin,
+        buffer_size=32,
+        mag=Mag(1),
+    ):
+
+        self.dataset_path = dataset_path
+        self.layer_name = layer_name
+        self.buffer_size = buffer_size
+
+        layer_path = path.join(self.dataset_path, self.layer_name, mag.to_layer_name())
+
+        self.dataset = wkw.Dataset.open(layer_path, wkw.Header(dtype))
+        self.origin = origin
+        self.bounding_box = bounding_box
+
+        self.buffer = []
+        self.current_z = None
+        self.buffer_start_z = None
+
+    def write_slice(self, z: int, data: np.ndarray):
+        """Takes in a slice in [y, x] shape, writes to WKW file."""
+
+        if len(self.buffer) == 0:
+            self.current_z = z
+            self.buffer_start_z = z
+
+        assert (
+            z == self.current_z
+        ), "({}) Slices have to be written sequentially!".format(getpid())
+
+        self.buffer.append(data.transpose())
+        self.current_z += 1
+
+        if self.current_z % self.buffer_size == 0:
+            self._write_buffer()
+
+    def _write_buffer(self):
+
+        if len(self.buffer) == 0:
+            return
+
+        assert len(self.buffer) <= self.buffer_size
+
+        logging.debug(
+            "({}) Writing {} slices at position {}.".format(
+                getpid(), len(self.buffer), self.buffer_start_z
+            )
+        )
+
+        origin_with_offset = self.origin.copy()
+        origin_with_offset[2] = self.buffer_start_z
+        x_max = max(slice.shape[0] for slice in self.buffer)
+        y_max = max(slice.shape[1] for slice in self.buffer)
+        self.buffer = [
+            np.pad(
+                slice,
+                mode="constant",
+                pad_width=[(0, x_max - slice.shape[0]), (0, y_max - slice.shape[1])],
+            )
+            for slice in self.buffer
+        ]
+        data = np.concatenate(
+            [np.expand_dims(slice, 2) for slice in self.buffer], axis=2
+        )
+
+        self.dataset.write(origin_with_offset, data)
+
+        self.buffer = []
+
+    def close(self):
+
+        self._write_buffer()
+        self.dataset.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        self.close()