|
| 1 | +import numpy as np |
| 2 | +import pytest |
| 3 | + |
| 4 | +from zarr.array_spec import ArraySpec |
| 5 | +from zarr.buffer import Buffer, BufferPrototype, NDBuffer |
| 6 | +from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, GzipCodec, ZstdCodec |
| 7 | +from zarr.metadata import DataType |
| 8 | + |
| 9 | + |
| 10 | +@pytest.fixture |
| 11 | +def buffer_prototype(): |
| 12 | + return BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) |
| 13 | + |
| 14 | + |
| 15 | +@pytest.fixture(params=list(DataType.__members__)) |
| 16 | +def dtype(request): |
| 17 | + return DataType[request.param] |
| 18 | + |
| 19 | + |
| 20 | +@pytest.fixture(params=[(0,), (1,), (10,), (10, 5), (10, 1), (1, 10), (5, 6, 7), (1, 2, 3, 4, 5)]) |
| 21 | +def shape(request): |
| 22 | + return request.param |
| 23 | + |
| 24 | + |
| 25 | +@pytest.fixture |
| 26 | +def array_spec(buffer_prototype, dtype, shape): |
| 27 | + return ArraySpec( |
| 28 | + shape=shape, |
| 29 | + dtype=dtype.to_numpy_shortname(), |
| 30 | + fill_value=0, |
| 31 | + order="C", |
| 32 | + prototype=buffer_prototype, |
| 33 | + ) |
| 34 | + |
| 35 | + |
| 36 | +# TODO: parametrize all options |
| 37 | +@pytest.fixture |
| 38 | +def array_bytes_codec(): |
| 39 | + return BytesCodec() |
| 40 | + |
| 41 | + |
| 42 | +@pytest.fixture(params=[0, 1, 2]) |
| 43 | +def input_chunks_and_specs(request, array_spec): |
| 44 | + num_chunks = request.param |
| 45 | + chunk_data = [ |
| 46 | + np.full( |
| 47 | + shape=array_spec.shape, fill_value=n, dtype=array_spec.dtype, order=array_spec.order |
| 48 | + ) |
| 49 | + for n in range(num_chunks) |
| 50 | + ] |
| 51 | + return [ |
| 52 | + (array_spec.prototype.nd_buffer.from_ndarray_like(data), array_spec) for data in chunk_data |
| 53 | + ] |
| 54 | + |
| 55 | + |
| 56 | +async def test_array_bytes_codecs(array_bytes_codec, input_chunks_and_specs): |
| 57 | + encoded = await array_bytes_codec.encode(input_chunks_and_specs) |
| 58 | + assert len(encoded) == len(input_chunks_and_specs) |
| 59 | + encoded_chunks_and_specs = [ |
| 60 | + (data, spec) for data, (_, spec) in zip(encoded, input_chunks_and_specs, strict=False) |
| 61 | + ] |
| 62 | + decoded = await array_bytes_codec.decode(encoded_chunks_and_specs) |
| 63 | + assert len(decoded) == len(input_chunks_and_specs) |
| 64 | + assert all( |
| 65 | + [ |
| 66 | + np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array()) |
| 67 | + for (data, _), decoded_data in zip(input_chunks_and_specs, decoded, strict=False) |
| 68 | + ] |
| 69 | + ) |
| 70 | + |
| 71 | + |
| 72 | +@pytest.fixture |
| 73 | +async def input_bytes_and_specs(input_chunks_and_specs): |
| 74 | + # transform ndbuffers to buffers via bytes codec |
| 75 | + bytes_codec = BytesCodec() |
| 76 | + encoded = await bytes_codec.encode(input_chunks_and_specs) |
| 77 | + encoded_chunks_and_specs = [ |
| 78 | + (data, spec) for data, (_, spec) in zip(encoded, input_chunks_and_specs, strict=False) |
| 79 | + ] |
| 80 | + return encoded_chunks_and_specs |
| 81 | + |
| 82 | + |
| 83 | +@pytest.fixture( |
| 84 | + params=[ |
| 85 | + pytest.param((GzipCodec, {}), id="GzipDefaults"), |
| 86 | + pytest.param((GzipCodec, {"level": 2}), id="GzipLev2"), |
| 87 | + pytest.param((ZstdCodec, {}), id="ZstdDefaults"), |
| 88 | + pytest.param((ZstdCodec, {"level": 2}), id="ZstdLev2"), |
| 89 | + pytest.param((ZstdCodec, {"level": 2, "checksum": True}), id="ZstdLev2Chksum"), |
| 90 | + pytest.param((Crc32cCodec, {}), id="Crc32c"), |
| 91 | + ] |
| 92 | +) |
| 93 | +def bytes_bytes_codec(request): |
| 94 | + Codec, kwargs = request.param |
| 95 | + return Codec(**kwargs) |
| 96 | + |
| 97 | + |
| 98 | +async def test_bytes_bytes_codecs(bytes_bytes_codec, input_bytes_and_specs): |
| 99 | + encoded = await bytes_bytes_codec.encode(input_bytes_and_specs) |
| 100 | + assert len(encoded) == len(input_bytes_and_specs) |
| 101 | + encoded_bytes_and_specs = [ |
| 102 | + (data, spec) for data, (_, spec) in zip(encoded, input_bytes_and_specs, strict=False) |
| 103 | + ] |
| 104 | + decoded = await bytes_bytes_codec.decode(encoded_bytes_and_specs) |
| 105 | + assert len(decoded) == len(input_bytes_and_specs) |
| 106 | + assert all( |
| 107 | + [ |
| 108 | + np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array()) |
| 109 | + for (data, _), decoded_data in zip(input_bytes_and_specs, decoded, strict=False) |
| 110 | + ] |
| 111 | + ) |
| 112 | + |
| 113 | + |
| 114 | +# blosc gets its own test because it has so many options |
| 115 | +@pytest.mark.parametrize("shuffle", ["noshuffle", "shuffle", "bitshuffle"]) |
| 116 | +@pytest.mark.parametrize("cname", ["lz4", "lz4hc", "blosclz", "zstd", "zlib", "snappy"]) |
| 117 | +@pytest.mark.parametrize("clevel", [0, 3, 8]) |
| 118 | +async def test_blosc_codec(input_bytes_and_specs, shuffle, cname, clevel): |
| 119 | + bytes_bytes_codec = BloscCodec(cname=cname, clevel=clevel, shuffle=shuffle) |
| 120 | + encoded = await bytes_bytes_codec.encode(input_bytes_and_specs) |
| 121 | + assert len(encoded) == len(input_bytes_and_specs) |
| 122 | + encoded_bytes_and_specs = [ |
| 123 | + (data, spec) for data, (_, spec) in zip(encoded, input_bytes_and_specs, strict=False) |
| 124 | + ] |
| 125 | + decoded = await bytes_bytes_codec.decode(encoded_bytes_and_specs) |
| 126 | + assert len(decoded) == len(input_bytes_and_specs) |
| 127 | + assert all( |
| 128 | + [ |
| 129 | + np.array_equal(data.as_numpy_array(), decoded_data.as_numpy_array()) |
| 130 | + for (data, _), decoded_data in zip(input_bytes_and_specs, decoded, strict=False) |
| 131 | + ] |
| 132 | + ) |
0 commit comments