Skip to content

Commit 7c297bc

Browse files
authored
Merge pull request #898 from fractal-analytics-platform/897_chunking_params
Add parameter for chunk sizes in converter & set default Z chunking to 10
2 parents 4fb7463 + 11e3fac commit 7c297bc

File tree

5 files changed

+203
-1
lines changed

5 files changed

+203
-1
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
* Tasks:
77
* Remove overlap checking for output ROIs in Cellpose task to address performance issues (\#889).
8+
* Expose chunking options in Cellvoyager OME-Zarr converter & set default Z chunking to 10 (\#898).
9+
* Library
10+
* Expose kwargs for build_pyramid (\#895).
811
* Testing:
912
* Fix issues with coverage report listing `.venv` Python modules for other dependencies (\#892).
1013
* Add `persist-credentials: false` to all `actions/checkout@v4` GitHub Action steps (\#893).

fractal_tasks_core/__FRACTAL_MANIFEST__.json

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,35 @@
201201
},
202202
"args_schema_parallel": {
203203
"$defs": {
204+
"ChunkSizes": {
205+
"description": "Chunk size settings for OME-Zarrs.",
206+
"properties": {
207+
"t": {
208+
"title": "T",
209+
"type": "integer"
210+
},
211+
"c": {
212+
"default": 1,
213+
"title": "C",
214+
"type": "integer"
215+
},
216+
"z": {
217+
"default": 10,
218+
"title": "Z",
219+
"type": "integer"
220+
},
221+
"y": {
222+
"title": "Y",
223+
"type": "integer"
224+
},
225+
"x": {
226+
"title": "X",
227+
"type": "integer"
228+
}
229+
},
230+
"title": "ChunkSizes",
231+
"type": "object"
232+
},
204233
"InitArgsCellVoyager": {
205234
"description": "Arguments to be passed from cellvoyager converter init to compute",
206235
"properties": {
@@ -260,6 +289,11 @@
260289
"$ref": "#/$defs/InitArgsCellVoyager",
261290
"title": "Init Args",
262291
"description": "Intialization arguments provided by `create_cellvoyager_ome_zarr_init`."
292+
},
293+
"chunk_sizes": {
294+
"$ref": "#/$defs/ChunkSizes",
295+
"title": "Chunk Sizes",
296+
"description": "Used to overwrite the default chunk sizes for the OME-Zarr. By default, the task will chunk the same as the microscope field of view size, with 10 z planes per chunk. For example, that can mean c: 1, z: 10, y: 2160, x:2560"
263297
}
264298
},
265299
"required": [
@@ -489,6 +523,35 @@
489523
},
490524
"args_schema_parallel": {
491525
"$defs": {
526+
"ChunkSizes": {
527+
"description": "Chunk size settings for OME-Zarrs.",
528+
"properties": {
529+
"t": {
530+
"title": "T",
531+
"type": "integer"
532+
},
533+
"c": {
534+
"default": 1,
535+
"title": "C",
536+
"type": "integer"
537+
},
538+
"z": {
539+
"default": 10,
540+
"title": "Z",
541+
"type": "integer"
542+
},
543+
"y": {
544+
"title": "Y",
545+
"type": "integer"
546+
},
547+
"x": {
548+
"title": "X",
549+
"type": "integer"
550+
}
551+
},
552+
"title": "ChunkSizes",
553+
"type": "object"
554+
},
492555
"InitArgsCellVoyager": {
493556
"description": "Arguments to be passed from cellvoyager converter init to compute",
494557
"properties": {
@@ -548,6 +611,11 @@
548611
"$ref": "#/$defs/InitArgsCellVoyager",
549612
"title": "Init Args",
550613
"description": "Intialization arguments provided by `create_cellvoyager_ome_zarr_init`."
614+
},
615+
"chunk_sizes": {
616+
"$ref": "#/$defs/ChunkSizes",
617+
"title": "Chunk Sizes",
618+
"description": "Used to overwrite the default chunk sizes for the OME-Zarr. By default, the task will chunk the same as the microscope field of view size, with 10 z planes per chunk. For example, that can mean c: 1, z: 10, y: 2160, x:2560"
551619
}
552620
},
553621
"required": [

fractal_tasks_core/tasks/cellvoyager_to_ome_zarr_compute.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import zarr
1919
from anndata import read_zarr
2020
from dask.array.image import imread
21+
from pydantic import Field
2122
from pydantic import validate_call
2223

2324
from fractal_tasks_core.cellvoyager.filenames import (
@@ -32,6 +33,7 @@
3233
from fractal_tasks_core.roi import (
3334
convert_ROI_table_to_indices,
3435
)
36+
from fractal_tasks_core.tasks.io_models import ChunkSizes
3537
from fractal_tasks_core.tasks.io_models import InitArgsCellVoyager
3638

3739

@@ -59,6 +61,7 @@ def cellvoyager_to_ome_zarr_compute(
5961
# Fractal parameters
6062
zarr_url: str,
6163
init_args: InitArgsCellVoyager,
64+
chunk_sizes: ChunkSizes = Field(default_factory=ChunkSizes),
6265
):
6366
"""
6467
Convert Yokogawa output (png, tif) to zarr file.
@@ -76,6 +79,10 @@ def cellvoyager_to_ome_zarr_compute(
7679
(standard argument for Fractal tasks, managed by Fractal server).
7780
init_args: Intialization arguments provided by
7881
`create_cellvoyager_ome_zarr_init`.
82+
chunk_sizes: Used to overwrite the default chunk sizes for the
83+
OME-Zarr. By default, the task will chunk the same as the
84+
microscope field of view size, with 10 z planes per chunk.
85+
For example, that can mean c: 1, z: 10, y: 2160, x:2560
7986
"""
8087
zarr_url = zarr_url.rstrip("/")
8188
# Read attributes from NGFF metadata
@@ -134,7 +141,14 @@ def cellvoyager_to_ome_zarr_compute(
134141
sample = imread(tmp_images.pop())
135142

136143
# Initialize zarr
137-
chunksize = (1, 1, sample.shape[1], sample.shape[2])
144+
chunksize_default = {
145+
"c": 1,
146+
"z": 10,
147+
"y": sample.shape[1],
148+
"x": sample.shape[2],
149+
}
150+
chunksize = chunk_sizes.get_chunksize(chunksize_default=chunksize_default)
151+
# chunksize["z"] =
138152
canvas_zarr = zarr.create(
139153
shape=(len(wavelength_ids), max_z, max_y, max_x),
140154
chunks=chunksize,

fractal_tasks_core/tasks/io_models.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from typing import Dict
12
from typing import Literal
23
from typing import Optional
4+
from typing import Tuple
35

46
from pydantic import BaseModel
57
from pydantic import Field
@@ -183,3 +185,56 @@ def channel_is_present(self: Self) -> Self:
183185
f"Input item has type={_type} but channel={channel}."
184186
)
185187
return self
188+
189+
190+
class ChunkSizes(BaseModel):
191+
"""
192+
Chunk size settings for OME-Zarrs.
193+
194+
Attributes:
195+
t: Chunk size of time axis.
196+
c: Chunk size of channel axis.
197+
z: Chunk size of Z axis.
198+
y: Chunk size of y axis.
199+
x: Chunk size of x axis.
200+
"""
201+
202+
t: Optional[int] = None
203+
c: Optional[int] = 1
204+
z: Optional[int] = 10
205+
y: Optional[int] = None
206+
x: Optional[int] = None
207+
208+
def get_chunksize(
209+
self, chunksize_default: Optional[Dict[str, int]] = None
210+
) -> Tuple[int, ...]:
211+
# Define the valid keys
212+
valid_keys = {"t", "c", "z", "y", "x"}
213+
214+
# If chunksize_default is not None, check for invalid keys
215+
if chunksize_default:
216+
invalid_keys = set(chunksize_default.keys()) - valid_keys
217+
if invalid_keys:
218+
raise ValueError(
219+
f"Invalid keys in chunksize_default: {invalid_keys}. "
220+
f"Only {valid_keys} are allowed."
221+
)
222+
223+
# Filter and use only valid keys from chunksize_default
224+
chunksize = {
225+
key: chunksize_default[key]
226+
for key in valid_keys
227+
if chunksize_default and key in chunksize_default
228+
}
229+
230+
# Overwrite with the values from the ChunkSizes instance if they are
231+
# not None
232+
for key in valid_keys:
233+
if getattr(self, key) is not None:
234+
chunksize[key] = getattr(self, key)
235+
236+
# Ensure the output tuple is ordered and matches the tczyx structure
237+
ordered_keys = ["t", "c", "z", "y", "x"]
238+
return tuple(
239+
chunksize[key] for key in ordered_keys if key in chunksize
240+
)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pytest
2+
3+
from fractal_tasks_core.tasks.io_models import ChunkSizes
4+
5+
6+
def test_valid_chunksize_default():
7+
"""Test valid chunksize_default with no conflicts, but not all defaults
8+
set."""
9+
chunk_sizes = ChunkSizes(t=5, c=2)
10+
chunksize_default = {"t": 10, "c": 1, "y": 2160, "x": 2560}
11+
result = chunk_sizes.get_chunksize(chunksize_default)
12+
# z = 10 is a ChunkSizes default that wasn't changed
13+
assert result == (5, 2, 10, 2160, 2560)
14+
15+
16+
def test_chunksize_default_with_overrides():
17+
"""Test chunksize_default where some keys are overridden by ChunkSizes."""
18+
chunk_sizes = ChunkSizes(t=5, c=None, z=20)
19+
chunksize_default = {"t": 10, "c": 1, "z": 15, "y": 2160, "x": 2560}
20+
result = chunk_sizes.get_chunksize(chunksize_default)
21+
assert result == (5, 1, 20, 2160, 2560)
22+
23+
24+
def test_chunksize_default_with_extra_keys():
25+
"""Test chunksize_default containing invalid keys."""
26+
chunk_sizes = ChunkSizes(t=5, c=2)
27+
chunksize_default = {"a": 100, "c": 1, "x": 2560}
28+
with pytest.raises(
29+
ValueError, match="Invalid keys in chunksize_default: {'a'}"
30+
):
31+
chunk_sizes.get_chunksize(chunksize_default)
32+
33+
34+
def test_chunksize_empty_default():
35+
"""Test when chunksize_default is None."""
36+
chunk_sizes = ChunkSizes(t=5, c=2)
37+
result = chunk_sizes.get_chunksize()
38+
assert result == (5, 2, 10)
39+
40+
41+
def test_chunksize_empty_chunksizes():
42+
"""Test when no values are set in ChunkSizes, but chunksize_default is
43+
valid."""
44+
chunk_sizes = ChunkSizes(c=None, z=None)
45+
chunksize_default = {"c": 1, "z": 64}
46+
result = chunk_sizes.get_chunksize(chunksize_default)
47+
assert result == (1, 64)
48+
49+
50+
def test_chunksize_default_with_empty_chunksize():
51+
"""Test empty chunksize_default with all ChunkSizes as None."""
52+
chunk_sizes = ChunkSizes(c=None, z=None)
53+
result = chunk_sizes.get_chunksize()
54+
assert result == ()
55+
56+
57+
def test_partial_chunksize_default():
58+
"""Test partial chunksize_default with some keys missing."""
59+
chunk_sizes = ChunkSizes(t=5, c=None)
60+
chunksize_default = {"z": 10, "y": 2160}
61+
result = chunk_sizes.get_chunksize(chunksize_default)
62+
assert result == (5, 10, 2160)

0 commit comments

Comments
 (0)