Fix tiff conversion for tiffs without pages (#1269)

normanrz · web-flow · commit 8f96dd7ed257 · 2025-03-13T16:53:56.000Z
diff --git a/webknossos/Changelog.md b/webknossos/Changelog.md
@@ -18,8 +18,10 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
 - Added tests for NDBoundingBox and VecInt classes. [#1268](https://github.com/scalableminds/webknossos-libs/pull/1268)
 
 ### Changed
+- Removed `PimsImagejTiffReader` in favor of the unified `PimsTiffReader`. [#1269](https://github.com/scalableminds/webknossos-libs/pull/1269)
 
 ### Fixed
+- Fixed issues with tiff conversion for tiff files that don't use pages for z and other axes. [#1269](https://github.com/scalableminds/webknossos-libs/pull/1269)
 
 
 ## [2.0.1](https://github.com/scalableminds/webknossos-libs/releases/tag/v2.0.1) - 2025-03-11
diff --git a/webknossos/webknossos/dataset/_utils/pims_imagej_tiff_reader.py b/webknossos/webknossos/dataset/_utils/pims_imagej_tiff_reader.py
diff --git a/webknossos/webknossos/dataset/_utils/pims_images.py b/webknossos/webknossos/dataset/_utils/pims_images.py
@@ -334,13 +334,6 @@ def _try_open_pims_images(
         except ImportError as import_error:
             import_exceptions.append(f"PimsDmReaders: {import_error.msg}")
 
-        try:
-            from .pims_imagej_tiff_reader import (  # noqa: F401 unused-import
-                PimsImagejTiffReader,
-            )
-        except ImportError as import_error:
-            import_exceptions.append(f"PimsImagejTiffReader: {import_error.msg}")
-
         try:
             from .pims_tiff_reader import PimsTiffReader  # noqa: F401 unused-import
         except ImportError as import_error:
diff --git a/webknossos/webknossos/dataset/_utils/pims_tiff_reader.py b/webknossos/webknossos/dataset/_utils/pims_tiff_reader.py
@@ -1,7 +1,7 @@
-from itertools import product
+import json
 from os import PathLike
 from pathlib import Path
-from typing import Set, Tuple, Union
+from typing import Iterator, NamedTuple, Set, Tuple, Union
 
 import numpy as np
 from pims import FramesSequenceND
@@ -14,6 +14,101 @@
     ) from e
 
 
+# This indexing function is adapted from zarr-python to work with tiffile's aszarr function
+# See https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/core/indexing.py
+class _ChunkProjection(NamedTuple):
+    chunk_coords: tuple[int, ...]
+    chunk_selection: tuple[Union[slice, int], ...]
+    out_selection: tuple[Union[slice, None], ...]
+
+
+def _chunk_indexing(
+    selection: tuple[Union[slice, int], ...],
+    shape: tuple[int, ...],
+    chunk_shape: tuple[int, ...],
+) -> Iterator[_ChunkProjection]:
+    from itertools import product
+
+    class ChunkDimProjection(NamedTuple):
+        dim_chunk_ix: int
+        dim_chunk_sel: Union[slice, int]
+        dim_out_sel: Union[slice, None]
+
+    def ceildiv(a: int, b: int) -> int:
+        return -(a // -b)
+
+    def slice_dim_indexer(
+        dim_sel: slice, dim_len: int, dim_chunk_len: int
+    ) -> Iterator[ChunkDimProjection]:
+        start, stop, step = dim_sel.indices(dim_len)
+        assert step == 1
+
+        # figure out the range of chunks we need to visit
+        dim_chunk_ix_from = start // dim_chunk_len
+        dim_chunk_ix_to = ceildiv(stop, dim_chunk_len)
+
+        # iterate over chunks in range
+        for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to):
+            # compute offsets for chunk within overall array
+            dim_offset = dim_chunk_ix * dim_chunk_len
+            dim_limit = min(dim_len, (dim_chunk_ix + 1) * dim_chunk_len)
+
+            # determine chunk length, accounting for trailing chunk
+            dim_chunk_len = dim_limit - dim_offset
+
+            if start < dim_offset:
+                # selection starts before current chunk
+                dim_chunk_sel_start = 0
+                remainder = (dim_offset - start) % 1
+                if remainder:
+                    dim_chunk_sel_start += 1 - remainder
+                # compute number of previous items, provides offset into output array
+                dim_out_offset = dim_offset - start
+
+            else:
+                # selection starts within current chunk
+                dim_chunk_sel_start = start - dim_offset
+                dim_out_offset = 0
+
+            if stop > dim_limit:
+                # selection ends after current chunk
+                dim_chunk_sel_stop = dim_chunk_len
+
+            else:
+                # selection ends within current chunk
+                dim_chunk_sel_stop = stop - dim_offset
+
+            dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, 1)
+            dim_chunk_nitems = dim_chunk_sel_stop - dim_chunk_sel_start
+            dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems)
+
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+
+    def int_dim_indexer(
+        dim_sel: int, dim_chunk_len: int
+    ) -> Iterator[ChunkDimProjection]:
+        dim_chunk_ix = dim_sel // dim_chunk_len
+        dim_offset = dim_chunk_ix * dim_chunk_len
+        yield ChunkDimProjection(dim_chunk_ix, dim_sel - dim_offset, None)
+
+    # setup per-dimension indexers
+    dim_indexers = [
+        slice_dim_indexer(dim_sel, dim_len, dim_chunk_len)
+        if isinstance(dim_sel, slice)
+        else int_dim_indexer(dim_sel, dim_chunk_len)
+        for dim_sel, dim_len, dim_chunk_len in zip(selection, shape, chunk_shape)
+    ]
+
+    for dim_projections in product(*dim_indexers):
+        chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections)
+        chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections)
+        out_selection = tuple(
+            p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None
+        )
+
+        yield _ChunkProjection(chunk_coords, chunk_selection, out_selection)
+
+
 class PimsTiffReader(FramesSequenceND):
     @classmethod
     def class_exts(cls) -> Set[str]:
@@ -22,7 +117,6 @@ def class_exts(cls) -> Set[str]:
     # class_priority is used in pims to pick the reader with the highest priority.
     # We decided to use a custom reader for tiff files to support images with more than 3 dimensions out of the box.
     # Default is 10, and bioformats priority is 2.
-    # Our custom reader for imagej_tiff has priority 20.
     # See http://soft-matter.github.io/pims/v0.6.1/custom_readers.html#plugging-into-pims-s-open-function
     class_priority = 19
 
@@ -55,66 +149,59 @@ def __init__(self, path: PathLike) -> None:
         else:
             self._register_get_frame(self.get_frame_2D, "yx")
 
+        expected_page_count = int(
+            np.prod([self.sizes[axis] for axis in self._other_axes])
+        )
+        self._page_mode = len(_tiff.pages) == expected_page_count
+
     def get_frame_2D(self, **ind: int) -> np.ndarray:
         _tiff = tifffile.TiffFile(self.path).series[0]
 
+        # We are using aszarr because it provides a chunked interface
+        # to the tiff file's content. However, we don't want to add
+        # zarr-python as a dependency. So we just implement the indexing
+        # ourselves and rely on the fact that tifffile isn't using more
+        # complex zarr features such as compressors, filters, F-order, fillvalue etc.
+        zarr_store = _tiff.aszarr()
+        zarray = json.loads(zarr_store[".zarray"])
+
+        assert zarray["zarr_format"] == 2
+        assert zarray["order"] == "C"
+        assert np.dtype(zarray["dtype"]) == self._dtype
+        assert zarray.get("compressor") is None
+        assert zarray.get("filters") in (None, [])
+        assert zarray["fill_value"] == 0
+        array_shape = tuple(zarray["shape"])
+        chunk_shape = tuple(zarray["chunks"])
+
+        # Prepare output array for this frame
         out_shape = tuple(self.sizes[axis] for axis in self.bundle_axes)
         out = np.zeros(out_shape, dtype=self._dtype)
 
-        # Axes that are present in the tiff page
-        page_axes = tuple(
-            axis for axis in self._tiff_axes if axis not in self._other_axes
-        )
         # Axes that need to be broadcasted from page to output
         broadcast_axes = tuple(
             axis
             for axis in self._tiff_axes
             if axis in self.bundle_axes and axis not in self._other_axes
         )
 
-        page_indices = product(
-            *[
-                range(self.sizes[axis])
-                if axis in self.bundle_axes
-                else range(ind[axis], ind[axis] + 1)
-                for axis in self._other_axes
-            ]
+        # Prepare selection of the data to read for this frame
+        selection: tuple[Union[slice, int], ...] = tuple(
+            slice(None) if axis in broadcast_axes else ind[axis]
+            for axis in self._tiff_axes
         )
 
-        # We iterate over all tiff pages to find the pages that are relevant for this frame
-        for page_ind in page_indices:
-            this_ind = {axis: index for axis, index in zip(self._other_axes, page_ind)}
-
-            i = 0
-            for j, axis in enumerate(self._other_axes):
-                i += this_ind[axis] * np.prod(
-                    [self.sizes[axis] for axis in self._other_axes[j + 1 :]],
-                    dtype=int,
-                )
-
-            # Prepare selectors
-            page_selector_list: list[Union[slice, int]] = []
-            for axis in page_axes:
-                if axis in self.bundle_axes:
-                    page_selector_list.append(slice(None))
-                else:
-                    page_selector_list.append(ind[axis])
-            page_selector = tuple(page_selector_list)
-
-            out_selector_list: list[Union[slice, int]] = []
-            for axis in self.bundle_axes:
-                if axis in broadcast_axes:
-                    out_selector_list.append(slice(None))  # broadcast
-                else:
-                    out_selector_list.append(
-                        this_ind[axis]
-                    )  # set page in a slice of the output
-            out_selector = tuple(out_selector_list)
-
-            page = _tiff.asarray(key=i)
-            assert len(out_selector) == out.ndim
-            assert len(page_selector) == page.ndim
-            out[out_selector] = page[page_selector]
+        for chunk_projection in _chunk_indexing(selection, array_shape, chunk_shape):
+            # read data from zarr store
+            chunk_data = (
+                zarr_store[".".join(map(str, chunk_projection.chunk_coords))]
+                .ravel()
+                .reshape(chunk_shape)
+            )
+            # write in output array
+            out[chunk_projection.out_selection] = chunk_data[
+                chunk_projection.chunk_selection
+            ]
 
         return out