Skip to content

Commit 2a7b5a8

Browse files
committed
fix config test failures
1 parent c8d7680 commit 2a7b5a8

File tree

5 files changed

+53
-47
lines changed

5 files changed

+53
-47
lines changed

src/zarr/core/array.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
3131
from zarr.abc.store import Store, set_or_delete
3232
from zarr.codecs._v2 import V2Codec
33+
from zarr.codecs.bytes import BytesCodec
3334
from zarr.core._info import ArrayInfo
3435
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config
3536
from zarr.core.attributes import Attributes
@@ -4231,7 +4232,6 @@ def _get_default_chunk_encoding_v3(
42314232
compressors = zarr_config.get(f"array.v3_default_compressors.{dtype._zarr_v3_name}")
42324233
else:
42334234
compressors = zarr_config.get("array.v3_default_compressors.default")
4234-
42354235
if dtype._zarr_v3_name in zarr_config.get("array.v3_default_serializer"):
42364236
serializer = zarr_config.get(f"array.v3_default_serializer.{dtype._zarr_v3_name}")
42374237
else:
@@ -4353,6 +4353,14 @@ def _parse_chunk_encoding_v3(
43534353

43544354
out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
43554355

4356+
# specialize codecs as needed given the dtype
4357+
4358+
# TODO: refactor so that the config only contains the name of the codec, and we use the dtype
4359+
# to create the codec instance, instead of storing a dict representation of a full codec.
4360+
4361+
if isinstance(out_array_bytes, BytesCodec) and dtype.to_dtype().itemsize == 1:
4362+
# The default endianness in the bytescodec might not be None, so we need to replace it
4363+
out_array_bytes = replace(out_array_bytes, endian=None)
43564364
return out_array_array, out_array_bytes, out_bytes_bytes
43574365

43584366

src/zarr/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def enable_gpu(self) -> ConfigSet:
8484
"fixed_length_ucs4": [{"id": "vlen-utf8"}],
8585
"fixed_length_ascii": [{"id": "vlen-bytes"}],
8686
},
87-
"v3_default_filters": {"default": ()},
87+
"v3_default_filters": {"default": []},
8888
"v3_default_serializer": {
8989
"default": {"name": "bytes", "configuration": {"endian": "little"}},
9090
"variable_length_utf8": {"name": "vlen-utf8"},

src/zarr/core/dtype/_numpy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ def to_dict(self) -> dict[str, JSON]:
711711
@classmethod
712712
def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
713713
unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
714-
if unit not in get_args(DateUnit | TimeUnit):
714+
if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
715715
raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
716716
return cls(unit=unit, endianness=endianness_from_numpy_str(dtype.byteorder))
717717

tests/test_array.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import dataclasses
2+
import inspect
23
import json
34
import math
45
import multiprocessing as mp
@@ -28,8 +29,6 @@
2829
from zarr.core.array import (
2930
CompressorsLike,
3031
FiltersLike,
31-
_get_default_chunk_encoding_v2,
32-
_get_default_chunk_encoding_v3,
3332
_parse_chunk_encoding_v2,
3433
_parse_chunk_encoding_v3,
3534
chunks_initialized,
@@ -1064,13 +1063,23 @@ async def test_default_filters_compressors(
10641063
shape=(10,),
10651064
zarr_format=zarr_format,
10661065
)
1066+
1067+
sig = inspect.signature(create_array)
1068+
10671069
if zarr_format == 3:
1068-
expected_filters, expected_serializer, expected_compressors = (
1069-
_get_default_chunk_encoding_v3(dtype=zdtype)
1070+
expected_filters, expected_serializer, expected_compressors = _parse_chunk_encoding_v3(
1071+
compressors=sig.parameters["compressors"].default,
1072+
filters=sig.parameters["filters"].default,
1073+
serializer=sig.parameters["serializer"].default,
1074+
dtype=zdtype,
10701075
)
10711076

10721077
elif zarr_format == 2:
1073-
default_filters, default_compressors = _get_default_chunk_encoding_v2(dtype=zdtype)
1078+
default_filters, default_compressors = _parse_chunk_encoding_v2(
1079+
compressor=sig.parameters["compressors"].default,
1080+
filters=sig.parameters["filters"].default,
1081+
dtype=zdtype,
1082+
)
10741083
if default_filters is None:
10751084
expected_filters = ()
10761085
else:

tests/test_config.py

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
GzipCodec,
2020
ShardingCodec,
2121
)
22+
from zarr.core.array import create_array
2223
from zarr.core.array_spec import ArraySpec
2324
from zarr.core.buffer import NDBuffer
2425
from zarr.core.codec_pipeline import BatchedCodecPipeline
2526
from zarr.core.config import BadConfigError, config
27+
from zarr.core.dtype import get_data_type_from_numpy
2628
from zarr.core.indexing import SelectorTuple
2729
from zarr.registry import (
2830
fully_qualified_name,
@@ -52,33 +54,24 @@ def test_config_defaults_set() -> None:
5254
"array": {
5355
"order": "C",
5456
"write_empty_chunks": False,
55-
"v2_default_compressor": {
56-
"numeric": {"id": "zstd", "level": 0, "checksum": False},
57-
"string": {"id": "zstd", "level": 0, "checksum": False},
58-
"bytes": {"id": "zstd", "level": 0, "checksum": False},
59-
},
57+
"v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
6058
"v2_default_filters": {
61-
"numeric": None,
62-
"string": [{"id": "vlen-utf8"}],
63-
"bytes": [{"id": "vlen-bytes"}],
64-
"raw": None,
59+
"default": None,
60+
"variable_length_utf8": [{"id": "vlen-utf8"}],
61+
"fixed_length_ucs4": [{"id": "vlen-utf8"}],
62+
"fixed_length_ascii": [{"id": "vlen-bytes"}],
6563
},
66-
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
64+
"v3_default_filters": {"default": []},
6765
"v3_default_serializer": {
68-
"numeric": {"name": "bytes", "configuration": {"endian": "little"}},
69-
"string": {"name": "vlen-utf8"},
70-
"bytes": {"name": "vlen-bytes"},
66+
"default": {"name": "bytes", "configuration": {"endian": "little"}},
67+
"variable_length_utf8": {"name": "vlen-utf8"},
68+
"fixed_length_ucs4": {"name": "vlen-utf8"},
69+
"r*": {"name": "vlen-bytes"},
7170
},
7271
"v3_default_compressors": {
73-
"numeric": [
74-
{"name": "zstd", "configuration": {"level": 0, "checksum": False}},
75-
],
76-
"string": [
77-
{"name": "zstd", "configuration": {"level": 0, "checksum": False}},
78-
],
79-
"bytes": [
72+
"default": [
8073
{"name": "zstd", "configuration": {"level": 0, "checksum": False}},
81-
],
74+
]
8275
},
8376
},
8477
"async": {"concurrency": 10, "timeout": None},
@@ -306,26 +299,22 @@ class NewCodec2(BytesCodec):
306299

307300
@pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
308301
async def test_default_codecs(dtype: str) -> None:
309-
with config.set(
310-
{
311-
"array.v3_default_compressors": { # test setting non-standard codecs
312-
"numeric": [
313-
{"name": "gzip", "configuration": {"level": 5}},
314-
],
315-
"string": [
316-
{"name": "gzip", "configuration": {"level": 5}},
317-
],
318-
"bytes": [
319-
{"name": "gzip", "configuration": {"level": 5}},
320-
],
321-
}
322-
}
323-
):
324-
arr = await zarr.api.asynchronous.create_array(
302+
"""
303+
Test that the default compressors are sensitive to the current setting of the config.
304+
"""
305+
zdtype = get_data_type_from_numpy(dtype)
306+
expected_compressors = (GzipCodec(),)
307+
new_conf = {
308+
f"array.v3_default_compressors.{zdtype._zarr_v3_name}": [
309+
c.to_dict() for c in expected_compressors
310+
]
311+
}
312+
with config.set(new_conf):
313+
arr = await create_array(
325314
shape=(100,),
326315
chunks=(100,),
327-
dtype=np.dtype(dtype),
316+
dtype=dtype,
328317
zarr_format=3,
329318
store=MemoryStore(),
330319
)
331-
assert arr.compressors == (GzipCodec(),)
320+
assert arr.compressors == expected_compressors

0 commit comments

Comments
 (0)