|
11 | 11 | import zarr |
12 | 12 | import zarr.core.buffer |
13 | 13 | import zarr.storage |
14 | | -from zarr import Array |
| 14 | +from zarr import Array, config |
15 | 15 | from zarr.storage import MemoryStore, StorePath |
16 | 16 |
|
17 | 17 |
|
@@ -82,47 +82,76 @@ def test_codec_pipeline() -> None: |
82 | 82 |
|
83 | 83 | @pytest.mark.parametrize("dtype", ["|S", "|V"]) |
84 | 84 | async def test_v2_encode_decode(dtype): |
85 | | - store = zarr.storage.MemoryStore() |
86 | | - g = zarr.group(store=store, zarr_format=2) |
87 | | - g.create_array( |
88 | | - name="foo", |
89 | | - shape=(3,), |
90 | | - chunks=(3,), |
91 | | - dtype=dtype, |
92 | | - fill_value=b"X", |
93 | | - ) |
94 | | - |
95 | | - result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) |
96 | | - assert result is not None |
97 | | - |
98 | | - serialized = json.loads(result.to_bytes()) |
99 | | - expected = { |
100 | | - "chunks": [3], |
101 | | - "compressor": None, |
102 | | - "dtype": f"{dtype}0", |
103 | | - "fill_value": "WA==", |
104 | | - "filters": None, |
105 | | - "order": "C", |
106 | | - "shape": [3], |
107 | | - "zarr_format": 2, |
108 | | - "dimension_separator": ".", |
109 | | - } |
110 | | - assert serialized == expected |
111 | | - |
112 | | - data = zarr.open_array(store=store, path="foo")[:] |
113 | | - expected = np.full((3,), b"X", dtype=dtype) |
114 | | - np.testing.assert_equal(data, expected) |
| 85 | + with config.set( |
| 86 | + { |
| 87 | + "v2_default_compressors": { |
| 88 | + "bytes": ["vlen-bytes"], |
| 89 | + }, |
| 90 | + } |
| 91 | + ): |
| 92 | + store = zarr.storage.MemoryStore() |
| 93 | + g = zarr.group(store=store, zarr_format=2) |
| 94 | + g.create_array( |
| 95 | + name="foo", |
| 96 | + shape=(3,), |
| 97 | + chunks=(3,), |
| 98 | + dtype=dtype, |
| 99 | + fill_value=b"X", |
| 100 | + ) |
| 101 | + |
| 102 | + result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) |
| 103 | + assert result is not None |
| 104 | + |
| 105 | + serialized = json.loads(result.to_bytes()) |
| 106 | + expected = { |
| 107 | + "chunks": [3], |
| 108 | + "compressor": None, |
| 109 | + "dtype": f"{dtype}0", |
| 110 | + "fill_value": "WA==", |
| 111 | + "filters": [{"id": "vlen-bytes"}], |
| 112 | + "order": "C", |
| 113 | + "shape": [3], |
| 114 | + "zarr_format": 2, |
| 115 | + "dimension_separator": ".", |
| 116 | + } |
| 117 | + assert serialized == expected |
| 118 | + |
| 119 | + data = zarr.open_array(store=store, path="foo")[:] |
| 120 | + expected = np.full((3,), b"X", dtype=dtype) |
| 121 | + np.testing.assert_equal(data, expected) |
| 122 | + |
| 123 | + |
| 124 | +@pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]]) |
| 125 | +def test_v2_encode_decode_with_data(dtype_value): |
| 126 | + dtype, value = dtype_value |
| 127 | + with config.set( |
| 128 | + { |
| 129 | + "v2_default_compressors": { |
| 130 | + "unicode": ["vlen-utf8"], |
| 131 | + "bytes": ["vlen-bytes"], |
| 132 | + }, |
| 133 | + } |
| 134 | + ): |
| 135 | + expected = np.full((3,), value, dtype=dtype) |
| 136 | + a = zarr.create( |
| 137 | + shape=(3,), |
| 138 | + zarr_format=2, |
| 139 | + dtype=dtype, |
| 140 | + ) |
| 141 | + a[:] = expected |
| 142 | + data = a[:] |
| 143 | + np.testing.assert_equal(data, expected) |
115 | 144 |
|
116 | 145 |
|
117 | 146 | @pytest.mark.parametrize("dtype", [str, "str"]) |
118 | 147 | async def test_create_dtype_str(dtype: Any) -> None: |
119 | 148 | arr = zarr.create(shape=3, dtype=dtype, zarr_format=2) |
120 | 149 | assert arr.dtype.kind == "O" |
121 | 150 | assert arr.metadata.to_dict()["dtype"] == "|O" |
122 | | - assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),) |
123 | | - arr[:] = ["a", "bb", "ccc"] |
| 151 | + assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),) |
| 152 | + arr[:] = [b"a", b"bb", b"ccc"] |
124 | 153 | result = arr[:] |
125 | | - np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object")) |
| 154 | + np.testing.assert_array_equal(result, np.array([b"a", b"bb", b"ccc"], dtype="object")) |
126 | 155 |
|
127 | 156 |
|
128 | 157 | @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]]) |
@@ -177,3 +206,22 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal[" |
177 | 206 | assert a.flags.c_contiguous |
178 | 207 | arr[slice(6, 9, None), slice(3, 6, None)] = a |
179 | 208 | np.testing.assert_array_equal(arr[slice(6, 9, None), slice(3, 6, None)], a) |
| 209 | + |
| 210 | + |
| 211 | +@pytest.mark.parametrize( |
| 212 | + "dtype_expected", |
| 213 | + [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-utf8"]], |
| 214 | +) |
| 215 | +def test_default_filters_and_compressor(dtype_expected: Any) -> None: |
| 216 | + with config.set( |
| 217 | + { |
| 218 | + "v2_dtype_kind_to_default_filters_and_compressor": { |
| 219 | + "numeric": ["zstd"], |
| 220 | + "unicode": ["vlen-utf8"], |
| 221 | + "bytes": ["vlen-bytes"], |
| 222 | + }, |
| 223 | + } |
| 224 | + ): |
| 225 | + dtype, expected = dtype_expected |
| 226 | + arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype) |
| 227 | + assert arr.metadata.filters[0].codec_id == expected |
0 commit comments