Skip to content
This repository was archived by the owner on Dec 1, 2025. It is now read-only.

Commit 7645b0e

Browse files
Feature/OmeZarr Chunk Dims (#519)
* chunk dim param * chunk dims param * lint * Update aicsimageio/writers/ome_zarr_writer.py Co-authored-by: Sean LeRoy <41307451+SeanLeRoy@users.noreply.github.com> * Update aicsimageio/writers/ome_zarr_writer.py Co-authored-by: Sean LeRoy <41307451+SeanLeRoy@users.noreply.github.com> * Update aicsimageio/writers/ome_zarr_writer.py Co-authored-by: Sean LeRoy <41307451+SeanLeRoy@users.noreply.github.com> * DimensionNames * pass dimension order to build_chunk_dims * OmeZarrWriter chunk tests * chunk comparison * pytest tmpdir fixture * DimensionNames * chunk_dim scaling for levels * nplanes inside chunk dims = None * ome-types 0.4.0 breaks build-main --------- Co-authored-by: Sean LeRoy <41307451+SeanLeRoy@users.noreply.github.com>
1 parent 7e537c8 commit 7645b0e

File tree

3 files changed

+190
-69
lines changed

3 files changed

+190
-69
lines changed

aicsimageio/tests/writers/test_ome_zarr_writer.py

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33

4-
import shutil
4+
import pathlib
55
from typing import Callable, List, Optional, Tuple
66

77
import numpy as np
@@ -12,7 +12,7 @@
1212
from aicsimageio import exceptions
1313
from aicsimageio.writers import OmeZarrWriter
1414

15-
from ..conftest import LOCAL, array_constructor, get_resource_write_full_path
15+
from ..conftest import array_constructor
1616

1717

1818
@array_constructor
@@ -69,14 +69,12 @@ def test_ome_zarr_writer_dims(
6969
expected_read_shape: Tuple[int, ...],
7070
expected_read_dim_order: str,
7171
filename: str,
72+
tmpdir: pathlib.Path,
7273
) -> None:
7374
# Create array
7475
arr = array_constructor(write_shape, dtype=np.uint8)
7576

76-
# Construct save end point
77-
save_uri = get_resource_write_full_path(filename, LOCAL)
78-
# clear out anything left over
79-
shutil.rmtree(save_uri, ignore_errors=True)
77+
save_uri = str(tmpdir / filename)
8078

8179
# Normal save
8280
writer = OmeZarrWriter(save_uri)
@@ -132,14 +130,11 @@ def test_ome_zarr_writer_scaling(
132130
expected_read_shapes: List[Tuple[int, ...]],
133131
expected_read_scales: List[List[int]],
134132
filename: str,
133+
tmpdir: pathlib.Path,
135134
) -> None:
136135
# Create array
137136
arr = array_constructor(write_shape, dtype=np.uint8)
138-
139-
# Construct save end point
140-
save_uri = get_resource_write_full_path(filename, LOCAL)
141-
# clear out anything left over
142-
shutil.rmtree(save_uri, ignore_errors=True)
137+
save_uri = str(tmpdir / filename)
143138

144139
# Normal save
145140
writer = OmeZarrWriter(save_uri)
@@ -160,3 +155,63 @@ def test_ome_zarr_writer_scaling(
160155
assert len(xforms) == 1
161156
assert xforms[0]["type"] == "scale"
162157
assert xforms[0]["scale"] == expected_read_scales[i]
158+
159+
160+
@array_constructor
161+
@pytest.mark.parametrize(
162+
"write_shape, chunk_dims, num_levels, expected_read_shapes",
163+
[
164+
(
165+
(2, 4, 8, 16, 32),
166+
(1, 1, 2, 16, 16),
167+
2,
168+
[(2, 4, 8, 16, 32), (2, 4, 8, 8, 16), (2, 4, 8, 4, 8)],
169+
),
170+
(
171+
(16, 32),
172+
(2, 4),
173+
2,
174+
[(16, 32), (8, 16), (4, 8)],
175+
),
176+
],
177+
)
178+
@pytest.mark.parametrize("filename", ["e.zarr"])
179+
def test_ome_zarr_writer_chunks(
180+
array_constructor: Callable,
181+
write_shape: Tuple[int, ...],
182+
chunk_dims: Tuple[int, ...],
183+
num_levels: int,
184+
filename: str,
185+
expected_read_shapes: List[Tuple[int, ...]],
186+
tmpdir: pathlib.Path,
187+
) -> None:
188+
arr = array_constructor(write_shape, dtype=np.uint8)
189+
190+
# Construct save end point
191+
192+
baseline_save_uri = str(tmpdir / f"baseline_{filename}")
193+
save_uri = str(tmpdir / filename)
194+
195+
# Normal save
196+
writer = OmeZarrWriter(save_uri)
197+
writer.write_image(
198+
arr, "", None, None, None, chunk_dims=chunk_dims, scale_num_levels=num_levels
199+
)
200+
reader = Reader(parse_url(save_uri))
201+
node = list(reader())[0]
202+
203+
# Check expected shapes
204+
for level in range(num_levels):
205+
shape = node.data[level].shape
206+
assert shape == expected_read_shapes[level]
207+
208+
# Create baseline chunking to compare against manual.
209+
writer = OmeZarrWriter(baseline_save_uri)
210+
writer.write_image(arr, "", None, None, None, scale_num_levels=num_levels)
211+
reader_baseline = Reader(parse_url(baseline_save_uri))
212+
node_baseline = list(reader_baseline())[0]
213+
214+
data = node.data[0]
215+
baseline_data = node_baseline.data[0]
216+
217+
assert np.all(np.equal(data, baseline_data))

aicsimageio/writers/ome_zarr_writer.py

Lines changed: 123 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from zarr.storage import default_compressor
99

1010
from .. import exceptions, types
11+
from ..dimensions import DEFAULT_DIMENSION_ORDER, DimensionNames
1112
from ..metadata import utils
1213
from ..utils import io_utils
1314

@@ -105,6 +106,13 @@ def build_ome(
105106
}
106107
return omero
107108

109+
@staticmethod
110+
def _build_chunk_dims(
111+
chunk_dim_map: Dict[str, int],
112+
dimension_order: str = DEFAULT_DIMENSION_ORDER,
113+
) -> Tuple[int, ...]:
114+
return tuple(chunk_dim_map[d] for d in dimension_order)
115+
108116
def write_image(
109117
self,
110118
# TODO how to pass in precomputed multiscales?
@@ -113,6 +121,7 @@ def write_image(
113121
physical_pixel_sizes: Optional[types.PhysicalPixelSizes],
114122
channel_names: Optional[List[str]],
115123
channel_colors: Optional[List[int]],
124+
chunk_dims: Optional[Tuple] = None,
116125
scale_num_levels: int = 1,
117126
scale_factor: float = 2.0,
118127
dimension_order: Optional[str] = None,
@@ -177,27 +186,34 @@ def write_image(
177186
f"Received image data with shape: {image_data.shape}"
178187
)
179188
if dimension_order is None:
180-
dimension_order = "TCZYX"[-ndims:]
189+
dimension_order = DEFAULT_DIMENSION_ORDER[-ndims:]
181190
if len(dimension_order) != ndims:
182191
raise exceptions.InvalidDimensionOrderingError(
183192
f"Dimension order {dimension_order} does not match data "
184193
f"shape: {image_data.shape}"
185194
)
186-
if (len(set(dimension_order) - set("TCZYX")) > 0) or len(
195+
if (len(set(dimension_order) - set(DEFAULT_DIMENSION_ORDER)) > 0) or len(
187196
dimension_order
188197
) != len(set(dimension_order)):
189198
raise exceptions.InvalidDimensionOrderingError(
190-
f"Dimension order {dimension_order} is invalid or "
191-
"contains unexpected dimensions. Only TCZYX currently supported."
199+
f"Dimension order {dimension_order} is invalid or contains"
200+
f"unexpected dimensions. Only {DEFAULT_DIMENSION_ORDER}"
201+
f"currently supported."
192202
)
193-
xdimindex = dimension_order.find("X")
194-
ydimindex = dimension_order.find("Y")
195-
zdimindex = dimension_order.find("Z")
196-
cdimindex = dimension_order.find("C")
203+
xdimindex = dimension_order.find(DimensionNames.SpatialX)
204+
ydimindex = dimension_order.find(DimensionNames.SpatialY)
205+
zdimindex = dimension_order.find(DimensionNames.SpatialZ)
206+
cdimindex = dimension_order.find(DimensionNames.Channel)
197207
if cdimindex > min(i for i in [xdimindex, ydimindex, zdimindex] if i > -1):
198208
raise exceptions.InvalidDimensionOrderingError(
199209
f"Dimension order {dimension_order} is invalid. Channel dimension "
200-
"must be before X, Y, and Z."
210+
f"must be before X, Y, and Z."
211+
)
212+
213+
if chunk_dims is not None and len(chunk_dims) != ndims:
214+
raise exceptions.UnexpectedShapeError(
215+
f"Chunk dimensions:{chunk_dims} do not match data. "
216+
f"Expected chunk dimension length:{ndims}"
201217
)
202218

203219
if physical_pixel_sizes is None:
@@ -227,12 +243,13 @@ def write_image(
227243
if cdimindex > -1
228244
else [0]
229245
)
246+
# Chunk spatial dimensions
230247
scale_dim_map = {
231-
"T": 1.0,
232-
"C": 1.0,
233-
"Z": pixelsizes[0],
234-
"Y": pixelsizes[1],
235-
"X": pixelsizes[2],
248+
DimensionNames.Time: 1.0,
249+
DimensionNames.Channel: 1.0,
250+
DimensionNames.SpatialZ: pixelsizes[0],
251+
DimensionNames.SpatialY: pixelsizes[1],
252+
DimensionNames.SpatialX: pixelsizes[2],
236253
}
237254
transforms = [
238255
[
@@ -249,26 +266,40 @@ def write_image(
249266
* image_data.shape[ydimindex]
250267
* image_data.itemsize
251268
)
269+
252270
target_chunk_size = 16 * (1024 * 1024) # 16 MB
253271
# this is making an assumption of chunking whole XY planes.
254-
# TODO allow callers to configure chunk dims?
255-
nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size))
256-
nplanes_per_chunk = (
257-
min(nplanes_per_chunk, image_data.shape[zdimindex]) if zdimindex > -1 else 1
258-
)
259-
chunk_dim_map = {
260-
"T": 1,
261-
"C": 1,
262-
"Z": nplanes_per_chunk,
263-
"Y": image_data.shape[ydimindex],
264-
"X": image_data.shape[xdimindex],
265-
}
266-
chunk_dims = [
267-
dict(
268-
chunks=tuple(chunk_dim_map[d] for d in dimension_order),
269-
compressor=default_compressor,
272+
273+
if chunk_dims is None:
274+
nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size))
275+
nplanes_per_chunk = (
276+
min(nplanes_per_chunk, image_data.shape[zdimindex])
277+
if zdimindex > -1
278+
else 1
270279
)
271-
]
280+
chunk_dim_map = {
281+
DimensionNames.Time: 1,
282+
DimensionNames.Channel: 1,
283+
DimensionNames.SpatialZ: nplanes_per_chunk,
284+
DimensionNames.SpatialY: image_data.shape[ydimindex],
285+
DimensionNames.SpatialX: image_data.shape[xdimindex],
286+
}
287+
chunks = [
288+
dict(
289+
chunks=OmeZarrWriter._build_chunk_dims(
290+
chunk_dim_map=chunk_dim_map, dimension_order=dimension_order
291+
),
292+
compressor=default_compressor,
293+
)
294+
]
295+
else:
296+
chunks = [
297+
dict(
298+
chunks=chunk_dims,
299+
compressor=default_compressor,
300+
)
301+
]
302+
272303
lasty = image_data.shape[ydimindex]
273304
lastx = image_data.shape[xdimindex]
274305
# TODO scaler might want to use different method for segmentations than raw
@@ -281,9 +312,9 @@ def write_image(
281312
scaler.method = "nearest"
282313
scaler.max_layer = scale_num_levels - 1
283314
scaler.downscale = scale_factor if scale_factor is not None else 2
284-
for i in range(scale_num_levels - 1):
285-
scale_dim_map["Y"] *= scaler.downscale
286-
scale_dim_map["X"] *= scaler.downscale
315+
for _ in range(scale_num_levels - 1):
316+
scale_dim_map[DimensionNames.SpatialY] *= scaler.downscale
317+
scale_dim_map[DimensionNames.SpatialX] *= scaler.downscale
287318
transforms.append(
288319
[
289320
{
@@ -292,24 +323,47 @@ def write_image(
292323
}
293324
]
294325
)
295-
lasty = int(math.ceil(lasty / scaler.downscale))
296-
lastx = int(math.ceil(lastx / scaler.downscale))
297-
plane_size = lasty * lastx * image_data.itemsize
298-
nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size))
299-
nplanes_per_chunk = (
300-
min(nplanes_per_chunk, image_data.shape[zdimindex])
301-
if zdimindex > -1
302-
else 1
303-
)
304-
chunk_dim_map["Z"] = nplanes_per_chunk
305-
chunk_dim_map["Y"] = lasty
306-
chunk_dim_map["X"] = lastx
307-
chunk_dims.append(
308-
dict(
309-
chunks=tuple(chunk_dim_map[d] for d in dimension_order),
310-
compressor=default_compressor,
326+
327+
if chunk_dims is None:
328+
lasty = int(math.ceil(lasty / scaler.downscale))
329+
lastx = int(math.ceil(lastx / scaler.downscale))
330+
chunk_dim_map = {
331+
DimensionNames.Time: 1,
332+
DimensionNames.Channel: 1,
333+
}
334+
plane_size = lasty * lastx * image_data.itemsize
335+
nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size))
336+
nplanes_per_chunk = (
337+
min(nplanes_per_chunk, image_data.shape[zdimindex])
338+
if zdimindex > -1
339+
else 1
311340
)
312-
)
341+
342+
chunk_dim_map[DimensionNames.SpatialZ] = nplanes_per_chunk
343+
chunk_dim_map[DimensionNames.SpatialY] = lasty
344+
chunk_dim_map[DimensionNames.SpatialX] = lastx
345+
346+
chunks.append(
347+
dict(
348+
chunks=OmeZarrWriter._build_chunk_dims(
349+
chunk_dim_map=chunk_dim_map,
350+
dimension_order=dimension_order,
351+
),
352+
compressor=default_compressor,
353+
)
354+
)
355+
else:
356+
rescaley = int(math.ceil(chunk_dims[ydimindex] / scaler.downscale))
357+
rescalex = int(math.ceil(chunk_dims[xdimindex] / scaler.downscale))
358+
chunk_dims = tuple(list(chunk_dims[:-2]) + [rescaley, rescalex])
359+
360+
chunks.append(
361+
dict(
362+
chunks=chunk_dims,
363+
compressor=default_compressor,
364+
)
365+
)
366+
313367
else:
314368
scaler = None
315369

@@ -328,11 +382,23 @@ def write_image(
328382
)
329383
# TODO user supplies units?
330384
dim_to_axis = {
331-
"T": {"name": "t", "type": "time", "unit": "millisecond"},
332-
"C": {"name": "c", "type": "channel"},
333-
"Z": {"name": "z", "type": "space", "unit": "micrometer"},
334-
"Y": {"name": "y", "type": "space", "unit": "micrometer"},
335-
"X": {"name": "x", "type": "space", "unit": "micrometer"},
385+
DimensionNames.Time: {"name": "t", "type": "time", "unit": "millisecond"},
386+
DimensionNames.Channel: {"name": "c", "type": "channel"},
387+
DimensionNames.SpatialZ: {
388+
"name": "z",
389+
"type": "space",
390+
"unit": "micrometer",
391+
},
392+
DimensionNames.SpatialY: {
393+
"name": "y",
394+
"type": "space",
395+
"unit": "micrometer",
396+
},
397+
DimensionNames.SpatialX: {
398+
"name": "x",
399+
"type": "space",
400+
"unit": "micrometer",
401+
},
336402
}
337403

338404
axes = [dim_to_axis[d] for d in dimension_order]
@@ -353,5 +419,5 @@ def write_image(
353419
# match the number of datasets in a multiresolution pyramid. One can
354420
# provide different chunk size for each level of a pyramid using this
355421
# option.
356-
storage_options=chunk_dims,
422+
storage_options=chunks,
357423
)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def run(self):
105105
"imagecodecs>=2020.5.30",
106106
"lxml>=4.6,<5",
107107
"numpy>=1.16,<2",
108-
"ome-types>=0.3.3",
108+
"ome-types>=0.3.3,<0.4.0",
109109
"ome-zarr>=0.6.1",
110110
"PyYAML>=6.0",
111111
"wrapt>=1.12",

0 commit comments

Comments
 (0)