diff --git a/changes/3228.removal.rst b/changes/3228.removal.rst new file mode 100644 index 0000000000..0463897755 --- /dev/null +++ b/changes/3228.removal.rst @@ -0,0 +1,16 @@ +Removes default chunk encoding settings (filters, serializer, compressors) from the global +configuration object. + +This removal is justified on the basis that storing chunk encoding settings in the config required +a brittle, confusing, and inaccurate categorization of array data types, which was particularly +unsuitable after the recent addition of new data types that didn't fit naturally into the +pre-existing categories. + +The default chunk encoding is the same (Zstandard compression, and the required object codecs for +variable length data types), but the chunk encoding is now generated by functions that cannot be +reconfigured at runtime. Users who relied on setting the default chunk encoding via the global configuration object should +instead specify the desired chunk encoding explicitly when creating an array. + +This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that +arrays with a ``VariableLengthUTF8`` or ``VariableLengthBytes`` data type cannot be created without the +correct "object codec". diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst index f45dfbebe8..67b134d442 100644 --- a/docs/user-guide/arrays.rst +++ b/docs/user-guide/arrays.rst @@ -246,16 +246,6 @@ built-in delta filter:: >>> z.compressors (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),) -The default compressor can be changed by setting the value of the using Zarr's -:ref:`user-guide-config`, e.g.:: - - >>> with zarr.config.set({'array.v2_default_compressor.default': {'id': 'blosc'}}): - ... z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2) - >>> z.filters - () - >>> z.compressors - (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),) - To disable compression, set ``compressors=None`` when creating an array, e.g.:: >>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None) diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst index 5a9d26f2b9..0ae8017ca9 100644 --- a/docs/user-guide/config.rst +++ b/docs/user-guide/config.rst @@ -43,25 +43,7 @@ This is the current default configuration:: >>> zarr.config.pprint() {'array': {'order': 'C', - 'v2_default_compressor': {'default': {'checksum': False, - 'id': 'zstd', - 'level': 0}, - 'variable-length-string': {'checksum': False, - 'id': 'zstd', - 'level': 0}}, - 'v2_default_filters': {'default': None, - 'variable-length-string': [{'id': 'vlen-utf8'}]}, - 'v3_default_compressors': {'default': [{'configuration': {'checksum': False, - 'level': 0}, - 'name': 'zstd'}], - 'variable-length-string': [{'configuration': {'checksum': False, - 'level': 0}, - 'name': 'zstd'}]}, - 'v3_default_filters': {'default': [], 'variable-length-string': []}, - 'v3_default_serializer': {'default': {'configuration': {'endian': 'little'}, - 'name': 'bytes'}, - 'variable-length-string': {'name': 'vlen-utf8'}}, - 'write_empty_chunks': False}, + 'write_empty_chunks': False}, 'async': {'concurrency': 10, 'timeout': None}, 'buffer': 'zarr.buffer.cpu.Buffer', 'codec_pipeline': {'batch_size': 1, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 312dc0bc4d..a4f7fc086a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -30,6 +30,8 @@ from zarr.abc.store import Store, set_or_delete from zarr.codecs._v2 import V2Codec from zarr.codecs.bytes import BytesCodec +from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec +from zarr.codecs.zstd import ZstdCodec from zarr.core._info import ArrayInfo from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config from zarr.core.attributes import Attributes @@ -68,11 +70,13 @@ from zarr.core.config import categorize_data_type from zarr.core.config import config as zarr_config from zarr.core.dtype import ( + VariableLengthBytes, + VariableLengthUTF8, ZDType, ZDTypeLike, parse_data_type, ) -from zarr.core.dtype.common import HasEndianness, HasItemSize +from zarr.core.dtype.common import HasEndianness, HasItemSize, HasObjectCodec from zarr.core.indexing import ( BasicIndexer, BasicSelection, @@ -109,6 +113,7 @@ ) from zarr.core.metadata.v2 import ( CompressorLikev2, + get_object_codec_id, parse_compressor, parse_filters, ) @@ -710,7 +715,10 @@ def _create_metadata_v3( shape = parse_shapelike(shape) if codecs is None: - filters, serializer, compressors = _get_default_chunk_encoding_v3(dtype) + filters = default_filters_v3(dtype) + serializer = default_serializer_v3(dtype) + compressors = default_compressors_v3(dtype) + codecs_parsed = (*filters, serializer, *compressors) else: codecs_parsed = tuple(codecs) @@ -850,10 +858,9 @@ async def _create_v2( else: await ensure_no_existing_node(store_path, zarr_format=2) - default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype) compressor_parsed: CompressorLikev2 if compressor == "auto": - compressor_parsed = default_compressor + compressor_parsed = default_compressor_v2(dtype) elif isinstance(compressor, BytesBytesCodec): raise ValueError( "Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. " @@ -863,7 +870,7 @@ async def _create_v2( compressor_parsed = compressor if filters is None: - filters = default_filters + filters = default_filters_v2(dtype) metadata = cls._create_metadata_v2( shape=shape, @@ -4654,19 +4661,80 @@ def _get_default_chunk_encoding_v3( ) -def _get_default_chunk_encoding_v2( - dtype: ZDType[TBaseDType, TBaseScalar], -) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]: +def default_filters_v3(dtype: ZDType[Any, Any]) -> tuple[ArrayArrayCodec, ...]: """ - Get the default chunk encoding for Zarr format 2 arrays, given a dtype + Given a data type, return the default filters for that data type. + + This is an empty tuple. No data types have default filters. """ - dtype_category = categorize_data_type(dtype) - filters = zarr_config.get("array.v2_default_filters").get(dtype_category) - compressor = zarr_config.get("array.v2_default_compressor").get(dtype_category) - if filters is not None: - filters = tuple(numcodecs.get_codec(f) for f in filters) + return () + + +def default_compressors_v3(dtype: ZDType[Any, Any]) -> tuple[BytesBytesCodec, ...]: + """ + Given a data type, return the default compressors for that data type. + + This is just a tuple containing ``ZstdCodec`` + """ + return (ZstdCodec(),) + + +def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: + """ + Given a data type, return the default serializer for that data type. + + The default serializer for most data types is the ``BytesCodec``, which may or may not be + parameterized with an endianness, depending on whether the data type has endianness. Variable + length strings and variable length bytes have hard-coded serializers -- ``VLenUTF8Codec`` and + ``VLenBytesCodec``, respectively. + + """ + serializer: ArrayBytesCodec = BytesCodec(endian=None) + + if isinstance(dtype, HasEndianness): + serializer = BytesCodec(endian="little") + elif isinstance(dtype, HasObjectCodec): + if dtype.object_codec_id == "vlen-bytes": + serializer = VLenBytesCodec() + elif dtype.object_codec_id == "vlen-utf8": + serializer = VLenUTF8Codec() + else: + msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." + raise ValueError(msg) + return serializer + + +def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] | None: + """ + Given a data type, return the default filters for that data type. + + For data types that require an object codec, namely variable length data types, + this is a tuple containing the object codec. Otherwise it's ``None``. + """ + if isinstance(dtype, HasObjectCodec): + if dtype.object_codec_id == "vlen-bytes": + from numcodecs import VLenBytes - return filters, numcodecs.get_codec(compressor) + return (VLenBytes(),) + elif dtype.object_codec_id == "vlen-utf8": + from numcodecs import VLenUTF8 + + return (VLenUTF8(),) + else: + msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." + raise ValueError(msg) + return None + + +def default_compressor_v2(dtype: ZDType[Any, Any]) -> numcodecs.abc.Codec: + """ + Given a data type, return the default compressors for that data type. + + This is just the numcodecs ``Zstd`` codec. + """ + from numcodecs import Zstd + + return Zstd(level=0, checksum=False) def _parse_chunk_encoding_v2( @@ -4678,14 +4746,13 @@ def _parse_chunk_encoding_v2( """ Generate chunk encoding classes for Zarr format 2 arrays with optional defaults. """ - default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype) _filters: tuple[numcodecs.abc.Codec, ...] | None _compressor: numcodecs.abc.Codec | None if compressor is None or compressor == (): _compressor = None elif compressor == "auto": - _compressor = default_compressor + _compressor = default_compressor_v2(dtype) elif isinstance(compressor, tuple | list) and len(compressor) == 1: _compressor = parse_compressor(compressor[0]) else: @@ -4697,7 +4764,7 @@ def _parse_chunk_encoding_v2( if filters is None: _filters = None elif filters == "auto": - _filters = default_filters + _filters = default_filters_v2(dtype) else: if isinstance(filters, Iterable): for idx, f in enumerate(filters): @@ -4708,7 +4775,33 @@ def _parse_chunk_encoding_v2( ) raise TypeError(msg) _filters = parse_filters(filters) - + if isinstance(dtype, HasObjectCodec): + # check the filters and the compressor for the object codec required for this data type + if _filters is None: + if _compressor is None: + object_codec_id = None + else: + object_codec_id = get_object_codec_id((_compressor.get_config(),)) + else: + object_codec_id = get_object_codec_id( + ( + *[f.get_config() for f in _filters], + _compressor.get_config() if _compressor is not None else None, + ) + ) + if object_codec_id is None: + if isinstance(dtype, VariableLengthUTF8): # type: ignore[unreachable] + codec_name = "the numcodecs.VLenUTF8 codec" # type: ignore[unreachable] + elif isinstance(dtype, VariableLengthBytes): # type: ignore[unreachable] + codec_name = "the numcodecs.VLenBytes codec" # type: ignore[unreachable] + else: + codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}" + msg = ( + f"Data type {dtype} requires {codec_name}, " + "but no such codec was specified in the filters or compressor parameters for " + "this array. " + ) + raise ValueError(msg) return _filters, _compressor @@ -4722,14 +4815,11 @@ def _parse_chunk_encoding_v3( """ Generate chunk encoding classes for v3 arrays with optional defaults. """ - default_array_array, default_array_bytes, default_bytes_bytes = _get_default_chunk_encoding_v3( - dtype - ) if filters is None: out_array_array: tuple[ArrayArrayCodec, ...] = () elif filters == "auto": - out_array_array = default_array_array + out_array_array = default_filters_v3(dtype) else: maybe_array_array: Iterable[Codec | dict[str, JSON]] if isinstance(filters, dict | Codec): @@ -4739,7 +4829,7 @@ def _parse_chunk_encoding_v3( out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array) if serializer == "auto": - out_array_bytes = default_array_bytes + out_array_bytes = default_serializer_v3(dtype) else: # TODO: ensure that the serializer is compatible with the ndarray produced by the # array-array codecs. For example, if a sequence of array-array codecs produces an @@ -4749,7 +4839,7 @@ def _parse_chunk_encoding_v3( if compressors is None: out_bytes_bytes: tuple[BytesBytesCodec, ...] = () elif compressors == "auto": - out_bytes_bytes = default_bytes_bytes + out_bytes_bytes = default_compressors_v3(dtype) else: maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]] if isinstance(compressors, dict | Codec): @@ -4759,17 +4849,11 @@ def _parse_chunk_encoding_v3( out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes) - # specialize codecs as needed given the dtype - - # TODO: refactor so that the config only contains the name of the codec, and we use the dtype - # to create the codec instance, instead of storing a dict representation of a full codec. - # TODO: ensure that the serializer is compatible with the ndarray produced by the # array-array codecs. For example, if a sequence of array-array codecs produces an # array with a single-byte data type, then the serializer should not specify endiannesss. - if isinstance(out_array_bytes, BytesCodec) and not isinstance(dtype, HasEndianness): - # The default endianness in the bytescodec might not be None, so we need to replace it - out_array_bytes = replace(out_array_bytes, endian=None) + + # TODO: add checks to ensure that the right serializer is used for vlen data types return out_array_array, out_array_bytes, out_bytes_bytes diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 05d048ef74..cc3c33cd17 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -78,6 +78,25 @@ def enable_gpu(self) -> ConfigSet: ) +# these keys were removed from the config as part of the 3.1.0 release. +# these deprecations should be removed in 3.1.1 or thereabouts. +deprecations = { + "array.v2_default_compressor.numeric": None, + "array.v2_default_compressor.string": None, + "array.v2_default_compressor.bytes": None, + "array.v2_default_filters.string": None, + "array.v2_default_filters.bytes": None, + "array.v3_default_filters.numeric": None, + "array.v3_default_filters.raw": None, + "array.v3_default_filters.bytes": None, + "array.v3_default_serializer.numeric": None, + "array.v3_default_serializer.string": None, + "array.v3_default_serializer.bytes": None, + "array.v3_default_compressors.string": None, + "array.v3_default_compressors.bytes": None, + "array.v3_default_compressors": None, +} + # The default configuration for zarr config = Config( "zarr", @@ -87,27 +106,6 @@ def enable_gpu(self) -> ConfigSet: "array": { "order": "C", "write_empty_chunks": False, - "v2_default_compressor": { - "default": {"id": "zstd", "level": 0, "checksum": False}, - "variable-length-string": {"id": "zstd", "level": 0, "checksum": False}, - }, - "v2_default_filters": { - "default": None, - "variable-length-string": [{"id": "vlen-utf8"}], - }, - "v3_default_filters": {"default": [], "variable-length-string": []}, - "v3_default_serializer": { - "default": {"name": "bytes", "configuration": {"endian": "little"}}, - "variable-length-string": {"name": "vlen-utf8"}, - }, - "v3_default_compressors": { - "default": [ - {"name": "zstd", "configuration": {"level": 0, "checksum": False}}, - ], - "variable-length-string": [ - {"name": "zstd", "configuration": {"level": 0, "checksum": False}} - ], - }, }, "async": {"concurrency": 10, "timeout": None}, "threading": {"max_workers": None}, @@ -132,6 +130,7 @@ def enable_gpu(self) -> ConfigSet: "ndbuffer": "zarr.buffer.cpu.NDBuffer", } ], + deprecations=deprecations, ) diff --git a/tests/test_array.py b/tests/test_array.py index 0bca860e84..4783bca05c 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -35,23 +35,30 @@ _parse_chunk_encoding_v3, chunks_initialized, create_array, + default_filters_v2, + default_serializer_v3, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.buffer.cpu import NDBuffer from zarr.core.chunk_grids import _auto_partition from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.core.common import JSON, MemoryOrder, ZarrFormat -from zarr.core.dtype import parse_data_type -from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr -from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str -from zarr.core.dtype.npy.float import Float32, Float64 -from zarr.core.dtype.npy.int import Int16, UInt8 -from zarr.core.dtype.npy.string import VariableLengthUTF8 -from zarr.core.dtype.npy.structured import ( +from zarr.core.dtype import ( + DateTime64, + Float32, + Float64, + Int16, Structured, + TimeDelta64, + UInt8, + VariableLengthBytes, + VariableLengthUTF8, + ZDType, + parse_data_type, ) -from zarr.core.dtype.npy.time import DateTime64, TimeDelta64 -from zarr.core.dtype.wrapper import ZDType +from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr +from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str +from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup from zarr.core.indexing import BasicIndexer, ceildiv from zarr.core.metadata.v2 import ArrayV2Metadata @@ -1335,6 +1342,8 @@ async def test_invalid_v3_arguments( async def test_v2_chunk_encoding( store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str ) -> None: + if dtype == "str" and filters != "auto": + pytest.skip("Only the auto filters are compatible with str dtype in this test.") arr = await create_array( store=store, dtype=dtype, @@ -1848,3 +1857,63 @@ def test_array_repr(store: Store) -> None: dtype = "uint8" arr = zarr.create_array(store, shape=shape, dtype=dtype) assert str(arr) == f"" + + +class UnknownObjectDtype(UTF8Base[np.dtypes.ObjectDType]): + object_codec_id = "unknown" # type: ignore[assignment] + + def to_native_dtype(self) -> np.dtypes.ObjectDType: + """ + Create a NumPy object dtype from this VariableLengthUTF8 ZDType. + + Returns + ------- + np.dtypes.ObjectDType + The NumPy object dtype. + """ + return np.dtype("o") # type: ignore[return-value] + + +@pytest.mark.parametrize( + "dtype", [VariableLengthUTF8(), VariableLengthBytes(), UnknownObjectDtype()] +) +def test_chunk_encoding_no_object_codec_errors(dtype: ZDType[Any, Any]) -> None: + """ + Test that a valuerror is raised when checking the chunk encoding for a v2 array with a + data type that requires an object codec, but where no object codec is specified + """ + if isinstance(dtype, VariableLengthUTF8): + codec_name = "the numcodecs.VLenUTF8 codec" + elif isinstance(dtype, VariableLengthBytes): + codec_name = "the numcodecs.VLenBytes codec" + else: + codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}" # type: ignore[attr-defined] + msg = ( + f"Data type {dtype} requires {codec_name}, " + "but no such codec was specified in the filters or compressor parameters for " + "this array. " + ) + with pytest.raises(ValueError, match=re.escape(msg)): + _parse_chunk_encoding_v2(filters=None, compressor=None, dtype=dtype) + + +def test_unknown_object_codec_default_serializer_v3() -> None: + """ + Test that we get a valueerrror when trying to create the default serializer for a data type + that requires an unknown object codec + """ + dtype = UnknownObjectDtype() + msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." + with pytest.raises(ValueError, match=re.escape(msg)): + default_serializer_v3(dtype) + + +def test_unknown_object_codec_default_filters_v2() -> None: + """ + Test that we get a valueerrror when trying to create the default serializer for a data type + that requires an unknown object codec + """ + dtype = UnknownObjectDtype() + msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." + with pytest.raises(ValueError, match=re.escape(msg)): + default_filters_v2(dtype) diff --git a/tests/test_config.py b/tests/test_config.py index e267601272..c59e721c49 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,6 +1,6 @@ import os from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import Any from unittest import mock from unittest.mock import Mock @@ -16,16 +16,13 @@ BloscCodec, BytesCodec, Crc32cCodec, - GzipCodec, ShardingCodec, ) -from zarr.core.array import create_array from zarr.core.array_spec import ArraySpec from zarr.core.buffer import NDBuffer from zarr.core.buffer.core import Buffer from zarr.core.codec_pipeline import BatchedCodecPipeline from zarr.core.config import BadConfigError, config -from zarr.core.dtype import Int8, VariableLengthUTF8 from zarr.core.indexing import SelectorTuple from zarr.registry import ( fully_qualified_name, @@ -38,7 +35,6 @@ register_ndbuffer, register_pipeline, ) -from zarr.storage import MemoryStore from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, @@ -46,9 +42,6 @@ TestNDArrayLike, ) -if TYPE_CHECKING: - from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType - def test_config_defaults_set() -> None: # regression test for available defaults @@ -60,27 +53,6 @@ def test_config_defaults_set() -> None: "array": { "order": "C", "write_empty_chunks": False, - "v2_default_compressor": { - "default": {"id": "zstd", "level": 0, "checksum": False}, - "variable-length-string": {"id": "zstd", "level": 0, "checksum": False}, - }, - "v2_default_filters": { - "default": None, - "variable-length-string": [{"id": "vlen-utf8"}], - }, - "v3_default_filters": {"default": [], "variable-length-string": []}, - "v3_default_serializer": { - "default": {"name": "bytes", "configuration": {"endian": "little"}}, - "variable-length-string": {"name": "vlen-utf8"}, - }, - "v3_default_compressors": { - "default": [ - {"name": "zstd", "configuration": {"level": 0, "checksum": False}}, - ], - "variable-length-string": [ - {"name": "zstd", "configuration": {"level": 0, "checksum": False}} - ], - }, }, "async": {"concurrency": 10, "timeout": None}, "threading": {"max_workers": None}, @@ -323,29 +295,31 @@ class NewCodec2(BytesCodec): get_codec_class("new_codec") -@pytest.mark.parametrize("dtype_category", ["variable-length-string", "default"]) -@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -async def test_default_codecs(dtype_category: str) -> None: +@pytest.mark.parametrize( + "key", + [ + "array.v2_default_compressor.numeric", + "array.v2_default_compressor.string", + "array.v2_default_compressor.bytes", + "array.v2_default_filters.string", + "array.v2_default_filters.bytes", + "array.v3_default_filters.numeric", + "array.v3_default_filters.raw", + "array.v3_default_filters.bytes", + "array.v3_default_serializer.numeric", + "array.v3_default_serializer.string", + "array.v3_default_serializer.bytes", + "array.v3_default_compressors.string", + "array.v3_default_compressors.bytes", + "array.v3_default_compressors", + ], +) +def test_deprecated_config(key: str) -> None: """ - Test that the default compressors are sensitive to the current setting of the config. + Test that a valuerror is raised when setting the default chunk encoding for a given + data type category """ - zdtype: ZDType[TBaseDType, TBaseScalar] - if dtype_category == "variable-length-string": - zdtype = VariableLengthUTF8() # type: ignore[assignment] - else: - zdtype = Int8() - expected_compressors = (GzipCodec(),) - new_conf = { - f"array.v3_default_compressors.{dtype_category}": [ - c.to_dict() for c in expected_compressors - ] - } - with config.set(new_conf): - arr = await create_array( - shape=(100,), - chunks=(100,), - dtype=zdtype, - zarr_format=3, - store=MemoryStore(), - ) - assert arr.compressors == expected_compressors + + with pytest.raises(ValueError): + with zarr.config.set({key: "foo"}): + pass diff --git a/tests/test_v2.py b/tests/test_v2.py index 29f031663f..4d17305995 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -73,37 +73,34 @@ def test_codec_pipeline() -> None: async def test_v2_encode_decode( dtype: str, expected_dtype: str, fill_value: bytes, fill_value_json: str ) -> None: - with config.set( - { - "array.v2_default_filters.bytes": [{"id": "vlen-bytes"}], - "array.v2_default_compressor.bytes": None, - } - ): - store = zarr.storage.MemoryStore() - g = zarr.group(store=store, zarr_format=2) - g.create_array( - name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None - ) + store = zarr.storage.MemoryStore() + g = zarr.group(store=store, zarr_format=2) + g.create_array( + name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None + ) - result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) - assert result is not None - - serialized = json.loads(result.to_bytes()) - expected = { - "chunks": [3], - "compressor": None, - "dtype": expected_dtype, - "fill_value": fill_value_json, - "filters": None, - "order": "C", - "shape": [3], - "zarr_format": 2, - "dimension_separator": ".", - } - assert serialized == expected - - data = zarr.open_array(store=store, path="foo")[:] - np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype)) + result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) + assert result is not None + + serialized = json.loads(result.to_bytes()) + expected = { + "chunks": [3], + "compressor": None, + "dtype": expected_dtype, + "fill_value": fill_value_json, + "filters": None, + "order": "C", + "shape": [3], + "zarr_format": 2, + "dimension_separator": ".", + } + assert serialized == expected + + data = zarr.open_array(store=store, path="foo")[:] + np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype)) + + data = zarr.open_array(store=store, path="foo")[:] + np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype)) @pytest.mark.parametrize(