|
24 | 24 | from zarr.core.buffer import NDBuffer |
25 | 25 | from zarr.core.codec_pipeline import BatchedCodecPipeline |
26 | 26 | from zarr.core.config import BadConfigError, config |
27 | | -from zarr.core.dtype import get_data_type_from_native_dtype |
| 27 | +from zarr.core.dtype._numpy import Int8, VariableLengthString |
28 | 28 | from zarr.core.indexing import SelectorTuple |
29 | 29 | from zarr.registry import ( |
30 | 30 | fully_qualified_name, |
|
48 | 48 |
|
49 | 49 | def test_config_defaults_set() -> None: |
50 | 50 | # regression test for available defaults |
51 | | - assert config.defaults == [ |
52 | | - { |
53 | | - "default_zarr_format": 3, |
54 | | - "array": { |
55 | | - "order": "C", |
56 | | - "write_empty_chunks": False, |
57 | | - "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}}, |
58 | | - "v2_default_filters": { |
59 | | - "default": None, |
60 | | - "numpy__variable_length_utf8": [{"id": "vlen-utf8"}], |
61 | | - "numpy__fixed_length_ucs4": [{"id": "vlen-utf8"}], |
62 | | - "numpy__fixed_length_ascii": [{"id": "vlen-bytes"}], |
| 51 | + assert ( |
| 52 | + config.defaults |
| 53 | + == [ |
| 54 | + { |
| 55 | + "default_zarr_format": 3, |
| 56 | + "array": { |
| 57 | + "order": "C", |
| 58 | + "write_empty_chunks": False, |
| 59 | + "v2_default_compressor": { |
| 60 | + "default": {"id": "zstd", "level": 0, "checksum": False}, |
| 61 | + "variable-length-string": {"id": "zstd", "level": 0, "checksum": False}, |
| 62 | + }, |
| 63 | + "v2_default_filters": { |
| 64 | + "default": None, |
| 65 | + "variable-length-string": [{"id": "vlen-utf8"}], |
| 66 | + }, |
| 67 | + "v3_default_filters": {"default": [], "variable-length-string": []}, |
| 68 | + "v3_default_serializer": { |
| 69 | + "default": {"name": "bytes", "configuration": {"endian": "little"}}, |
| 70 | + "variable-length-string": {"name": "vlen-utf8"}, |
| 71 | + }, |
| 72 | + "v3_default_compressors": { |
| 73 | + "default": [ |
| 74 | + {"name": "zstd", "configuration": {"level": 0, "checksum": False}}, |
| 75 | + ], |
| 76 | + "variable-length-string": [ |
| 77 | + {"name": "zstd", "configuration": {"level": 0, "checksum": False}} |
| 78 | + ], |
| 79 | + }, |
63 | 80 | }, |
64 | | - "v3_default_filters": {"default": []}, |
65 | | - "v3_default_serializer": { |
66 | | - "default": {"name": "bytes", "configuration": {"endian": "little"}}, |
67 | | - "numpy__variable_length_utf8": {"name": "vlen-utf8"}, |
68 | | - "numpy__fixed_length_ucs4": {"name": "vlen-utf8"}, |
69 | | - "r*": {"name": "vlen-bytes"}, |
| 81 | + "async": {"concurrency": 10, "timeout": None}, |
| 82 | + "threading": {"max_workers": None}, |
| 83 | + "json_indent": 2, |
| 84 | + "codec_pipeline": { |
| 85 | + "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", |
| 86 | + "batch_size": 1, |
70 | 87 | }, |
71 | | - "v3_default_compressors": { |
72 | | - "default": [ |
73 | | - {"name": "zstd", "configuration": {"level": 0, "checksum": False}}, |
74 | | - ] |
| 88 | + "codecs": { |
| 89 | + "blosc": "zarr.codecs.blosc.BloscCodec", |
| 90 | + "gzip": "zarr.codecs.gzip.GzipCodec", |
| 91 | + "zstd": "zarr.codecs.zstd.ZstdCodec", |
| 92 | + "bytes": "zarr.codecs.bytes.BytesCodec", |
| 93 | + "endian": "zarr.codecs.bytes.BytesCodec", # compatibility with earlier versions of ZEP1 |
| 94 | + "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", |
| 95 | + "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", |
| 96 | + "transpose": "zarr.codecs.transpose.TransposeCodec", |
| 97 | + "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", |
| 98 | + "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", |
75 | 99 | }, |
76 | | - }, |
77 | | - "async": {"concurrency": 10, "timeout": None}, |
78 | | - "threading": {"max_workers": None}, |
79 | | - "json_indent": 2, |
80 | | - "codec_pipeline": { |
81 | | - "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", |
82 | | - "batch_size": 1, |
83 | | - }, |
84 | | - "buffer": "zarr.core.buffer.cpu.Buffer", |
85 | | - "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", |
86 | | - "codecs": { |
87 | | - "blosc": "zarr.codecs.blosc.BloscCodec", |
88 | | - "gzip": "zarr.codecs.gzip.GzipCodec", |
89 | | - "zstd": "zarr.codecs.zstd.ZstdCodec", |
90 | | - "bytes": "zarr.codecs.bytes.BytesCodec", |
91 | | - "endian": "zarr.codecs.bytes.BytesCodec", |
92 | | - "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", |
93 | | - "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", |
94 | | - "transpose": "zarr.codecs.transpose.TransposeCodec", |
95 | | - "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", |
96 | | - "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", |
97 | | - }, |
98 | | - } |
99 | | - ] |
| 100 | + "buffer": "zarr.core.buffer.cpu.Buffer", |
| 101 | + "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", |
| 102 | + } |
| 103 | + ] |
| 104 | + ) |
100 | 105 | assert config.get("array.order") == "C" |
101 | 106 | assert config.get("async.concurrency") == 10 |
102 | 107 | assert config.get("async.timeout") is None |
@@ -297,23 +302,26 @@ class NewCodec2(BytesCodec): |
297 | 302 | get_codec_class("new_codec") |
298 | 303 |
|
299 | 304 |
|
300 | | -@pytest.mark.parametrize("dtype", ["int", "bytes", "str"]) |
301 | | -async def test_default_codecs(dtype: str) -> None: |
| 305 | +@pytest.mark.parametrize("dtype_category", ["variable-length-string", "default"]) |
| 306 | +async def test_default_codecs(dtype_category: str) -> None: |
302 | 307 | """ |
303 | 308 | Test that the default compressors are sensitive to the current setting of the config. |
304 | 309 | """ |
305 | | - zdtype = get_data_type_from_native_dtype(dtype) |
| 310 | + if dtype_category == "variable-length-string": |
| 311 | + zdtype = VariableLengthString() |
| 312 | + else: |
| 313 | + zdtype = Int8() |
306 | 314 | expected_compressors = (GzipCodec(),) |
307 | 315 | new_conf = { |
308 | | - f"array.v3_default_compressors.{zdtype._zarr_v3_name.replace('.', '__')}": [ |
| 316 | + f"array.v3_default_compressors.{dtype_category}": [ |
309 | 317 | c.to_dict() for c in expected_compressors |
310 | 318 | ] |
311 | 319 | } |
312 | 320 | with config.set(new_conf): |
313 | 321 | arr = await create_array( |
314 | 322 | shape=(100,), |
315 | 323 | chunks=(100,), |
316 | | - dtype=dtype, |
| 324 | + dtype=zdtype, |
317 | 325 | zarr_format=3, |
318 | 326 | store=MemoryStore(), |
319 | 327 | ) |
|
0 commit comments