Skip to content

Commit 68ad945

Browse files
committed
Add compressor, codec pipeline strategy
1 parent 870265a commit 68ad945

File tree

1 file changed

+49
-7
lines changed

1 file changed

+49
-7
lines changed

src/zarr/testing/strategies.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import sys
2-
from typing import Any
2+
from typing import Any, Literal
33

44
import hypothesis.extra.numpy as npst
55
import hypothesis.strategies as st
6+
import numcodecs
7+
import numcodecs.zarr3 as ncodecs
68
import numpy as np
79
from hypothesis import given, settings # noqa: F401
810
from hypothesis.strategies import SearchStrategy
911

1012
import zarr
13+
from zarr import codecs as zcodecs
1114
from zarr.abc.store import RangeByteRequest
1215
from zarr.core.array import Array
1316
from zarr.core.common import ZarrFormat
@@ -86,11 +89,51 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
8689
# i.e. stores.examples() will always return the same object per Store class.
8790
# So we map a clear to reset the store.
8891
stores = st.builds(MemoryStore, st.just({})).map(lambda x: sync(x.clear()))
89-
compressors = st.sampled_from([None, "default"])
90-
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([2, 3])
92+
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
9193
array_shapes = npst.array_shapes(max_dims=4, min_side=0)
9294

9395

96+
@st.composite # type: ignore[misc]
97+
def codecs(
98+
draw: st.DrawFn,
99+
*,
100+
zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats,
101+
dtypes: st.SearchStrategy[np.dtype] | None = None,
102+
) -> Any:
103+
zarr_format = draw(zarr_formats)
104+
# we intentional don't parameterize over `level` or `clevel` to reduce the search space
105+
zarr_codecs = st.one_of(
106+
st.builds(zcodecs.ZstdCodec),
107+
st.builds(
108+
zcodecs.BloscCodec,
109+
shuffle=st.builds(
110+
zcodecs.BloscShuffle.from_int, num=st.integers(min_value=0, max_value=2)
111+
),
112+
),
113+
st.builds(zcodecs.GzipCodec),
114+
st.builds(zcodecs.Crc32cCodec),
115+
)
116+
num_codecs_v2 = st.one_of(
117+
st.builds(numcodecs.Zlib),
118+
st.builds(numcodecs.LZMA),
119+
st.builds(numcodecs.Zstd),
120+
st.builds(numcodecs.Zlib),
121+
)
122+
num_codecs_v3 = st.one_of(
123+
st.builds(ncodecs.Blosc),
124+
st.builds(ncodecs.LZMA),
125+
# st.builds(ncodecs.PCodec),
126+
# st.builds(ncodecs.ZFPY),
127+
)
128+
codec_kwargs = {"filters": draw(st.none() | st.just(()))}
129+
if zarr_format == 2:
130+
codec_kwargs["compressors"] = draw(num_codecs_v2 | st.none() | st.just(()))
131+
else:
132+
# Intentionally prioritize using a codec over no codec
133+
codec_kwargs["compressors"] = draw(zarr_codecs | num_codecs_v3 | st.none() | st.just(()))
134+
return codec_kwargs
135+
136+
94137
@st.composite # type: ignore[misc]
95138
def numpy_arrays(
96139
draw: st.DrawFn,
@@ -139,12 +182,12 @@ def arrays(
139182
draw: st.DrawFn,
140183
*,
141184
shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
142-
compressors: st.SearchStrategy = compressors,
143185
stores: st.SearchStrategy[StoreLike] = stores,
144186
paths: st.SearchStrategy[str | None] = paths,
145187
array_names: st.SearchStrategy = array_names,
146188
arrays: st.SearchStrategy | None = None,
147189
attrs: st.SearchStrategy = attrs,
190+
codecs: st.SearchStrategy = codecs,
148191
zarr_formats: st.SearchStrategy = zarr_formats,
149192
) -> Array:
150193
store = draw(stores)
@@ -157,21 +200,20 @@ def arrays(
157200
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
158201
# test that None works too.
159202
fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
160-
# compressor = draw(compressors)
161203

162204
expected_attrs = {} if attributes is None else attributes
163205

164206
array_path = _dereference_path(path, name)
165207
root = zarr.open_group(store, mode="w", zarr_format=zarr_format)
166-
208+
codec_kwargs = draw(codecs(zarr_formats=st.just(zarr_format), dtypes=st.just(nparray.dtype)))
167209
a = root.create_array(
168210
array_path,
169211
shape=nparray.shape,
170212
chunks=chunks,
171213
dtype=nparray.dtype,
172214
attributes=attributes,
173-
# compressor=compressor, # FIXME
174215
fill_value=fill_value,
216+
**codec_kwargs,
175217
)
176218

177219
assert isinstance(a, Array)

0 commit comments

Comments
 (0)