Skip to content

Commit 9a3f1b4

Browse files
authored
Merge pull request #152 from HiPCTProject/memory-downsample
Add function to get memory for downsampling
2 parents 29d869b + 909c5ef commit 9a3f1b4

File tree

5 files changed

+41
-7
lines changed

5 files changed

+41
-7
lines changed

docs/index.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,14 @@ This is easy to do with `virtual environments <https://docs.astral.sh/uv/pip/env
3535
Changelog
3636
---------
3737
See https://github.com/HiPCTProject/stack-to-chunk/releases for the list of tags and a changelog for each one.
38+
39+
Version 2
40+
~~~~~~~~~
41+
42+
Version 2 of ``stack-to-chunk`` is a major breaking release to add support for OME-Zarr version 0.5 (and therefore Zarr version 3).
43+
The following are major changes to the library:
44+
45+
- ``stack-to-chunk`` automatically adds sharding to the resulting data.
46+
See the guide page for more information on how this works.
47+
- ``memory_per_process`` has been renamed ``memory_per_slab_process``, as it calculates the memory required to copy a single slab of data.
48+
- A new function, ``memory_per_downsample_process`` has also been added to calculate the minimum memory required in a downsampling process.

docs/tutorial/tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393
# without any downsampling. Before doing this lets do a quick check of how much memory
9494
# each process will take up when we run stack-to-chunk:
9595

96-
bytes_per_process = stack_to_chunk.memory_per_process(images, chunk_size=16)
96+
bytes_per_process = stack_to_chunk.memory_per_slab_process(images, chunk_size=16)
9797
print(f"Each process will use {bytes_per_process / 1e6:.1f} MB")
9898

9999

src/stack_to_chunk/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,20 @@
44
"SPATIAL_UNIT",
55
"MultiScaleGroup",
66
"__version__",
7-
"memory_per_process",
7+
"memory_per_downsample_process",
8+
"memory_per_slab_process",
89
"open_multiscale_group",
910
]
1011

1112
from loguru import logger
1213

1314
from ._version import __version__
14-
from .main import MultiScaleGroup, memory_per_process, open_multiscale_group
15+
from .main import (
16+
MultiScaleGroup,
17+
memory_per_downsample_process,
18+
memory_per_slab_process,
19+
open_multiscale_group,
20+
)
1521
from .ome_ngff import SPATIAL_UNIT
1622

1723
logger.disable("stack_to_chunk")

src/stack_to_chunk/main.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
DEFAULT_DIMENSION_NAMES = ("x", "y", "z")
2424

2525

26-
def memory_per_process(input_data: Array, *, chunk_size: int) -> int:
26+
def memory_per_slab_process(input_data: Array, *, chunk_size: int) -> int:
2727
"""
28-
The amount of memory each stack-to-chunk process will use (in bytes).
28+
The amount of memory each stack-to-chunk slab copying process will use (in bytes).
2929
3030
This is a lower bound on memory use, equal to the size of a slab of data with size
3131
(nx, ny, chunk_size), where (nx, ny) is the shape of a single input
@@ -35,6 +35,17 @@ def memory_per_process(input_data: Array, *, chunk_size: int) -> int:
3535
return int(input_data.shape[0] * input_data.shape[1] * itemsize * chunk_size)
3636

3737

38+
def memory_per_downsample_process(input_group: "MultiScaleGroup") -> int:
39+
"""
40+
The amount of memory each stack-to-chunk downsampling process will use (in bytes).
41+
42+
This is a lower bound on memory use.
43+
"""
44+
source_arr: zarr.Array = input_group._group["0"] # noqa: SLF001
45+
mem_per_slab = memory_per_slab_process(source_arr, chunk_size=source_arr.shards[2])
46+
return math.ceil(mem_per_slab * 5 / 8)
47+
48+
3849
class MultiScaleGroup:
3950
"""
4051
A class for creating and interacting with a OME-Zarr multi-scale group.

src/stack_to_chunk/tests/test_main.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
import zarr.codecs
1414
from pydantic_zarr.v3 import ArraySpec, NamedConfig
1515

16-
from stack_to_chunk import MultiScaleGroup, memory_per_process, open_multiscale_group
16+
from stack_to_chunk import (
17+
MultiScaleGroup,
18+
memory_per_slab_process,
19+
open_multiscale_group,
20+
)
21+
from stack_to_chunk.main import memory_per_downsample_process
1722

1823

1924
def check_zattrs(zarr_path: Path, expected: dict[str, Any]) -> None:
@@ -153,7 +158,7 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
153158
},
154159
)
155160

156-
assert memory_per_process(arr, chunk_size=chunk_size) == 18282880
161+
assert memory_per_slab_process(arr, chunk_size=chunk_size) == 18282880
157162
group.add_full_res_data(
158163
arr,
159164
n_processes=1,
@@ -172,6 +177,7 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
172177
group = open_multiscale_group(zarr_path)
173178
assert group.levels == [0]
174179

180+
assert memory_per_downsample_process(group) == 11426800
175181
group.add_downsample_level(1, n_processes=2)
176182
assert group.levels == [0, 1]
177183

0 commit comments

Comments
 (0)