wip fix chunks

LucaMarconato · LucaMarconato · commit a890951e1cb5 · 2026-01-16T12:50:39.000+01:00
diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
@@ -11,7 +11,7 @@
 from spatialdata import SpatialData
 from xarray import DataArray
 
-from spatialdata_io import image
+from spatialdata_io import image  # type: ignore[attr-defined]
 
 # =============================================================================
 # CONFIGURATION - Edit these paths to match your setup
@@ -73,13 +73,29 @@ def _convert_image(
         # sanity check
         if scale_factors is None:
             assert isinstance(sdata["image"], DataArray)
+            if chunks is not None:
+                assert (
+                    sdata["image"].chunksizes["x"][0] == chunks[0]
+                    or sdata["image"].chunksizes["x"][0] == sdata["image"].shape[2]
+                )
+                assert (
+                    sdata["image"].chunksizes["y"][0] == chunks[1]
+                    or sdata["image"].chunksizes["y"][0] == sdata["image"].shape[1]
+                )
         else:
-            assert len(sdata["image"].keys()) == len(scale_factors)
+            assert len(sdata["image"].keys()) == len(scale_factors) + 1
+            if chunks is not None:
+                assert (
+                    sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[0]
+                    or sdata["image"]["scale0"]["image"].chunksizes["x"][0]
+                    == sdata["image"]["scale0"]["image"].shape[2]
+                )
+                assert (
+                    sdata["image"]["scale0"]["image"].chunksizes["y"][0] == chunks[1]
+                    or sdata["image"]["scale0"]["image"].chunksizes["y"][0]
+                    == sdata["image"]["scale0"]["image"].shape[1]
+                )
 
-        if chunks is not None:
-            # TODO: bug here!
-            assert sdata["image"].chunksizes["x"] == chunks[0]
-            assert sdata["image"].chunksizes["y"] == chunks[1]
         return sdata
 
     def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
@@ -96,5 +112,27 @@ def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chu
 if __name__ == "__main__":
     # Run a single test case for quick verification
     bench = IOBenchmarkImage()
-    bench.setup(None, True, (1000, 1000))
-    bench.time_io(None, True, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io(None, True, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io(None, True, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io(None, False, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io(None, False, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], True, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], True, (1000, 1000))
+
+    bench.setup()
+    bench.time_io([2, 2, 2], False, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], False, (1000, 1000))
diff --git a/benchmarks/benchmark_xenium.py b/benchmarks/benchmark_xenium.py
@@ -103,4 +103,6 @@ def peakmem_io(self) -> None:
 
 
 if __name__ == "__main__":
-    IOBenchmarkXenium().time_io()
+    benchmark = IOBenchmarkXenium()
+    benchmark.setup()
+    benchmark.time_io()
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
@@ -131,11 +131,15 @@ def _reader_func(slide: np.memmap, y0: int, x0: int, height: int, width: int) ->
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
 
 
-def _dask_image_imread(input: Path, data_axes: Sequence[str]) -> da.Array:
+def _dask_image_imread(input: Path, data_axes: Sequence[str], chunks: tuple[int, int] | None = None) -> da.Array:
+    if set(data_axes) != {"c", "y", "x"}:
+        raise NotImplementedError(f"Only 'c', 'y', 'x' axes are supported, got {data_axes}")
     image = imread(input)
-    if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
-        image = np.squeeze(image, axis=0)
-    return image
+    if image.ndim != len(data_axes):
+        raise ValueError(f"Expected image with {len(data_axes)} dimensions, got {image.ndim}")
+    image = image.transpose(*[data_axes.index(ax) for ax in ["c", "y", "x"]])
+    chunks = (1,) + chunks
+    return image.rechunk(chunks)
 
 
 def image(
@@ -187,11 +191,11 @@ def image(
             use_tiff_memmap = False
 
     if input.suffix in [".tiff", ".tif"] and not use_tiff_memmap or input.suffix in [".png", ".jpg", ".jpeg"]:
-        im = _dask_image_imread(input=input, data_axes=data_axes)
+        im = _dask_image_imread(input=input, data_axes=data_axes, chunks=chunks)
 
     if im is None:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 
     return Image2DModel.parse(
-        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors
+        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors, chunks=chunks
     )