Skip to content

Commit 767eb5f

Browse files
committed
add codec unit tests
1 parent b8baa68 commit 767eb5f

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

tests/v3/test_all_codecs.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import numpy as np
2+
import pytest
3+
4+
from zarr.array_spec import ArraySpec
5+
from zarr.buffer import Buffer, BufferPrototype, NDBuffer
6+
from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, GzipCodec, ZstdCodec
7+
from zarr.metadata import DataType
8+
9+
10+
@pytest.fixture
11+
def buffer_prototype():
12+
return BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
13+
14+
15+
@pytest.fixture(params=list(DataType.__members__))
16+
def dtype(request):
17+
return DataType[request.param]
18+
19+
20+
@pytest.fixture(params=[(0,), (1,), (10,), (10, 5), (10, 1), (1, 10), (5, 6, 7), (1, 2, 3, 4, 5)])
21+
def shape(request):
22+
return request.param
23+
24+
25+
@pytest.fixture
26+
def array_spec(buffer_prototype, dtype, shape):
27+
return ArraySpec(
28+
shape=shape,
29+
dtype=dtype.to_numpy_shortname(),
30+
fill_value=0,
31+
order="C",
32+
prototype=buffer_prototype,
33+
)
34+
35+
36+
# TODO: parametrize all options
37+
@pytest.fixture
38+
def array_bytes_codec():
39+
return BytesCodec()
40+
41+
42+
@pytest.fixture(params=[0, 1, 2])
43+
def input_chunks_and_specs(request, array_spec):
44+
num_chunks = request.param
45+
chunk_data = [
46+
np.full(
47+
shape=array_spec.shape, fill_value=n, dtype=array_spec.dtype, order=array_spec.order
48+
)
49+
for n in range(num_chunks)
50+
]
51+
return [
52+
(array_spec.prototype.nd_buffer.from_ndarray_like(data), array_spec) for data in chunk_data
53+
]
54+
55+
56+
async def test_array_bytes_codecs(array_bytes_codec, input_chunks_and_specs):
57+
encoded = await array_bytes_codec.encode(input_chunks_and_specs)
58+
assert len(encoded) == len(input_chunks_and_specs)
59+
encoded_chunks_and_specs = [
60+
(data, spec) for data, (_, spec) in zip(encoded, input_chunks_and_specs, strict=False)
61+
]
62+
decoded = await array_bytes_codec.decode(encoded_chunks_and_specs)
63+
assert len(decoded) == len(input_chunks_and_specs)
64+
assert all(
65+
[
66+
np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array())
67+
for (data, _), decoded_data in zip(input_chunks_and_specs, decoded, strict=False)
68+
]
69+
)
70+
71+
72+
@pytest.fixture
73+
async def input_bytes_and_specs(input_chunks_and_specs):
74+
# transform ndbuffers to buffers via bytes codec
75+
bytes_codec = BytesCodec()
76+
encoded = await bytes_codec.encode(input_chunks_and_specs)
77+
encoded_chunks_and_specs = [
78+
(data, spec) for data, (_, spec) in zip(encoded, input_chunks_and_specs, strict=False)
79+
]
80+
return encoded_chunks_and_specs
81+
82+
83+
@pytest.fixture(
84+
params=[
85+
pytest.param((GzipCodec, {}), id="GzipDefaults"),
86+
pytest.param((GzipCodec, {"level": 2}), id="GzipLev2"),
87+
pytest.param((ZstdCodec, {}), id="ZstdDefaults"),
88+
pytest.param((ZstdCodec, {"level": 2}), id="ZstdLev2"),
89+
pytest.param((ZstdCodec, {"level": 2, "checksum": True}), id="ZstdLev2Chksum"),
90+
pytest.param((Crc32cCodec, {}), id="Crc32c"),
91+
]
92+
)
93+
def bytes_bytes_codec(request):
94+
Codec, kwargs = request.param
95+
return Codec(**kwargs)
96+
97+
98+
async def test_bytes_bytes_codecs(bytes_bytes_codec, input_bytes_and_specs):
99+
encoded = await bytes_bytes_codec.encode(input_bytes_and_specs)
100+
assert len(encoded) == len(input_bytes_and_specs)
101+
encoded_bytes_and_specs = [
102+
(data, spec) for data, (_, spec) in zip(encoded, input_bytes_and_specs, strict=False)
103+
]
104+
decoded = await bytes_bytes_codec.decode(encoded_bytes_and_specs)
105+
assert len(decoded) == len(input_bytes_and_specs)
106+
assert all(
107+
[
108+
np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array())
109+
for (data, _), decoded_data in zip(input_bytes_and_specs, decoded, strict=False)
110+
]
111+
)
112+
113+
114+
# blosc gets its own test because it has so many options
115+
@pytest.mark.parametrize("shuffle", ["noshuffle", "shuffle", "bitshuffle"])
116+
@pytest.mark.parametrize("cname", ["lz4", "lz4hc", "blosclz", "zstd", "zlib", "snappy"])
117+
@pytest.mark.parametrize("clevel", [0, 3, 8])
118+
async def test_blosc_codec(input_bytes_and_specs, shuffle, cname, clevel):
119+
bytes_bytes_codec = BloscCodec(cname=cname, clevel=clevel, shuffle=shuffle)
120+
encoded = await bytes_bytes_codec.encode(input_bytes_and_specs)
121+
assert len(encoded) == len(input_bytes_and_specs)
122+
encoded_bytes_and_specs = [
123+
(data, spec) for data, (_, spec) in zip(encoded, input_bytes_and_specs, strict=False)
124+
]
125+
decoded = await bytes_bytes_codec.decode(encoded_bytes_and_specs)
126+
assert len(decoded) == len(input_bytes_and_specs)
127+
assert all(
128+
[
129+
np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array())
130+
for (data, _), decoded_data in zip(input_bytes_and_specs, decoded, strict=False)
131+
]
132+
)

0 commit comments

Comments
 (0)