Skip to content

Commit 4fb7463

Browse files
authored
Merge pull request #895 from m-albert/build_pyramid_zarr_creation_options
Added optional parameter open_array_kwargs to build_pyramid
2 parents cf0bb32 + 4a05a5c commit 4fb7463

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

fractal_tasks_core/pyramids.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
import logging
1616
import pathlib
1717
from typing import Callable
18+
from typing import Mapping
1819
from typing import Optional
1920
from typing import Sequence
2021
from typing import Union
2122

2223
import dask.array as da
2324
import numpy as np
25+
import zarr.errors
2426

2527
logger = logging.getLogger(__name__)
2628

@@ -33,6 +35,7 @@ def build_pyramid(
3335
coarsening_xy: int = 2,
3436
chunksize: Optional[Sequence[int]] = None,
3537
aggregation_function: Optional[Callable] = None,
38+
open_array_kwargs: Optional[Mapping] = None,
3639
) -> None:
3740

3841
"""
@@ -48,6 +51,7 @@ def build_pyramid(
4851
coarsening_xy: Linear coarsening factor between subsequent levels.
4952
chunksize: Shape of a single chunk.
5053
aggregation_function: Function to be used when downsampling.
54+
open_array_kwargs: Additional arguments for zarr.open.
5155
"""
5256

5357
# Clean up zarrurl
@@ -100,10 +104,33 @@ def build_pyramid(
100104
f"{str(newlevel_rechunked)}"
101105
)
102106

107+
if open_array_kwargs is None:
108+
open_array_kwargs = {}
109+
110+
# If overwrite is false, check that the array doesn't exist yet
111+
if not overwrite:
112+
try:
113+
zarr.open(f"{zarrurl}/{ind_level}", mode="r")
114+
raise ValueError(
115+
f"While building the pyramids, pyramid level {ind_level} "
116+
"already existed, but `build_pyramid` was called with "
117+
f"{overwrite=}."
118+
)
119+
except zarr.errors.PathNotFoundError:
120+
pass
121+
122+
zarrarr = zarr.open(
123+
f"{zarrurl}/{ind_level}",
124+
shape=newlevel_rechunked.shape,
125+
chunks=newlevel_rechunked.chunksize,
126+
dtype=newlevel_rechunked.dtype,
127+
mode="w",
128+
**open_array_kwargs,
129+
)
130+
103131
# Write zarr and store output (useful to construct next level)
104132
previous_level = newlevel_rechunked.to_zarr(
105-
zarrurl,
106-
component=f"{ind_level}",
133+
zarrarr,
107134
overwrite=overwrite,
108135
compute=True,
109136
return_stored=True,

tests/test_unit_pyramid_creation.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,36 @@ def test_build_pyramid(tmp_path):
6464
assert level_3.chunksize == (9, 9)
6565
assert level_4.shape == (3, 3)
6666
assert level_5.shape == (1, 1)
67+
68+
# Succeed
69+
zarrurl = str(tmp_path / "F.zarr")
70+
da.zeros(shape=(8, 8)).to_zarr(f"{zarrurl}/0")
71+
build_pyramid(
72+
zarrurl=zarrurl,
73+
coarsening_xy=2,
74+
num_levels=3,
75+
open_array_kwargs={"write_empty_chunks": False, "fill_value": 0},
76+
)
77+
level_1 = da.from_zarr(f"{zarrurl}/1")
78+
level_2 = da.from_zarr(f"{zarrurl}/2")
79+
assert level_1.shape == (4, 4)
80+
assert level_2.shape == (2, 2)
81+
# check that the empty chunks are not written to disk
82+
assert not (tmp_path / "F.zarr/1/0.0").exists()
83+
assert not (tmp_path / "F.zarr/2/0.0").exists()
84+
85+
86+
def test_build_pyramid_overwrite(tmp_path):
87+
# Succeed
88+
zarrurl = str(tmp_path / "D.zarr")
89+
da.ones(shape=(8, 8)).to_zarr(f"{zarrurl}/0")
90+
build_pyramid(zarrurl=zarrurl, coarsening_xy=2, num_levels=3)
91+
# Should fail because overwrite is not set
92+
with pytest.raises(ValueError):
93+
build_pyramid(
94+
zarrurl=zarrurl, coarsening_xy=2, num_levels=3, overwrite=False
95+
)
96+
# Should work
97+
build_pyramid(
98+
zarrurl=zarrurl, coarsening_xy=2, num_levels=3, overwrite=True
99+
)

0 commit comments

Comments
 (0)