Skip to content

Commit 856b40f

Browse files
committed
allow single codec instances for filters, and None for filters / compressor, and condense some tests
1 parent bbe3a94 commit 856b40f

File tree

3 files changed

+78
-41
lines changed

3 files changed

+78
-41
lines changed

src/zarr/core/array.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,12 +3505,14 @@ def _get_default_codecs(
35053505
| Iterable[numcodecs.abc.Codec]
35063506
| numcodecs.abc.Codec
35073507
| Literal["auto"]
3508+
| None
35083509
)
35093510
CompressorsParam: TypeAlias = (
35103511
Iterable[dict[str, JSON] | BytesBytesCodec]
35113512
| BytesBytesCodec
35123513
| numcodecs.abc.Codec
35133514
| Literal["auto"]
3515+
| None
35143516
)
35153517

35163518

@@ -3646,6 +3648,7 @@ async def create_array(
36463648
filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
36473649
compressor=compressors, filters=filters, dtype=np.dtype(dtype)
36483650
)
3651+
36493652
if dimension_names is not None:
36503653
raise ValueError("Zarr v2 arrays do not support dimension names.")
36513654
if order is None:
@@ -3801,26 +3804,32 @@ def _parse_chunk_encoding_v2(
38013804
"""
38023805
default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
38033806

3804-
_filters: tuple[numcodecs.abc.Codec, ...] | None = None
3805-
_compressor: numcodecs.abc.Codec | None = None
3807+
_filters: tuple[numcodecs.abc.Codec, ...] | None
3808+
_compressor: numcodecs.abc.Codec | None
38063809

3807-
if compressor == "auto":
3810+
if compressor is None:
3811+
_compressor = None
3812+
elif compressor == "auto":
38083813
_compressor = default_compressor
38093814
else:
38103815
if isinstance(compressor, Iterable) and not isinstance(compressor, dict):
38113816
msg = f"For Zarr v2 arrays, the `compressor` must be a single codec. Got an iterable with type {type(compressor)} instead."
38123817
raise TypeError(msg)
38133818
_compressor = parse_compressor(compressor)
38143819

3815-
if filters == "auto":
3820+
if filters is None:
3821+
_filters = None
3822+
elif filters == "auto":
38163823
_filters = default_filters
38173824
else:
3818-
if isinstance(filters, Iterable) and not all(
3819-
isinstance(f, numcodecs.abc.Codec) for f in filters
3820-
):
3821-
raise TypeError(
3822-
"For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
3823-
)
3825+
if isinstance(filters, Iterable):
3826+
for idx, f in enumerate(filters):
3827+
if not isinstance(f, numcodecs.abc.Codec):
3828+
msg = (
3829+
"For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs. "
3830+
f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec."
3831+
)
3832+
raise TypeError(msg)
38243833
_filters = parse_filters(filters)
38253834

38263835
return _filters, _compressor
@@ -3840,18 +3849,24 @@ def _parse_chunk_encoding_v3(
38403849
)
38413850
maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
38423851
maybe_array_array: Iterable[Codec | dict[str, JSON]]
3852+
out_bytes_bytes: tuple[BytesBytesCodec, ...]
3853+
if compressors is None:
3854+
out_bytes_bytes = ()
38433855

3844-
if compressors == "auto":
3856+
elif compressors == "auto":
38453857
out_bytes_bytes = default_bytes_bytes
3858+
38463859
else:
38473860
if isinstance(compressors, dict | Codec):
38483861
maybe_bytes_bytes = (compressors,)
38493862
else:
38503863
maybe_bytes_bytes = cast(Iterable[Codec | dict[str, JSON]], compressors)
38513864

38523865
out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
3853-
3854-
if filters == "auto":
3866+
out_array_array: tuple[ArrayArrayCodec, ...]
3867+
if filters is None:
3868+
out_array_array = ()
3869+
elif filters == "auto":
38553870
out_array_array = default_array_array
38563871
else:
38573872
if isinstance(filters, dict | Codec):

src/zarr/core/metadata/v2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
241241
msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead."
242242
raise TypeError(msg)
243243
return tuple(out)
244+
# take a single codec instance and wrap it in a tuple
245+
if isinstance(data, numcodecs.abc.Codec):
246+
return (data,)
244247
msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
245248
raise TypeError(msg)
246249

tests/test_array.py

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
FiltersParam,
2727
_get_default_chunk_encoding_v2,
2828
_get_default_chunk_encoding_v3,
29+
_parse_chunk_encoding_v2,
2930
_parse_chunk_encoding_v3,
3031
chunks_initialized,
3132
create_array,
@@ -1002,43 +1003,26 @@ async def test_create_array_no_filters_compressors(
10021003

10031004

10041005
@pytest.mark.parametrize("store", ["memory"], indirect=True)
1006+
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
10051007
@pytest.mark.parametrize(
10061008
"compressors",
10071009
[
10081010
"auto",
1011+
None,
1012+
(),
10091013
(ZstdCodec(level=3),),
10101014
(ZstdCodec(level=3), GzipCodec(level=0)),
10111015
ZstdCodec(level=3),
10121016
{"name": "zstd", "configuration": {"level": 3}},
10131017
({"name": "zstd", "configuration": {"level": 3}},),
10141018
],
10151019
)
1016-
async def test_create_array_v3_compressors(
1017-
store: MemoryStore, compressors: CompressorsParam
1018-
) -> None:
1019-
"""
1020-
Test various possibilities for the compressors parameter to create_array
1021-
"""
1022-
dtype = "uint8"
1023-
arr = await create_array(
1024-
store=store,
1025-
dtype=dtype,
1026-
shape=(10,),
1027-
zarr_format=3,
1028-
compressors=compressors,
1029-
)
1030-
_, _, bb_codecs_expected = _parse_chunk_encoding_v3(
1031-
filters=(), compressors=compressors, dtype=np.dtype(dtype)
1032-
)
1033-
# TODO: find a better way to get the compressors from the array.
1034-
assert arr.codec_pipeline.bytes_bytes_codecs == bb_codecs_expected # type: ignore[attr-defined]
1035-
1036-
1037-
@pytest.mark.parametrize("store", ["memory"], indirect=True)
10381020
@pytest.mark.parametrize(
10391021
"filters",
10401022
[
10411023
"auto",
1024+
None,
1025+
(),
10421026
(
10431027
TransposeCodec(
10441028
order=[
@@ -1067,23 +1051,58 @@ async def test_create_array_v3_compressors(
10671051
({"name": "transpose", "configuration": {"order": [0]}},),
10681052
],
10691053
)
1070-
async def test_create_array_v3_filters(store: MemoryStore, filters: FiltersParam) -> None:
1054+
async def test_create_array_v3_chunk_encoding(
1055+
store: MemoryStore, compressors: CompressorsParam, filters: FiltersParam, dtype: str
1056+
) -> None:
10711057
"""
1072-
Test various possibilities for the filters parameter to create_array
1058+
Test various possibilities for the compressors and filters parameter to create_array
10731059
"""
1074-
dtype = "uint8"
10751060
arr = await create_array(
10761061
store=store,
10771062
dtype=dtype,
10781063
shape=(10,),
10791064
zarr_format=3,
10801065
filters=filters,
1066+
compressors=compressors,
10811067
)
1082-
aa_codecs_expected, _, _ = _parse_chunk_encoding_v3(
1083-
filters=filters, compressors=(), dtype=np.dtype(dtype)
1068+
aa_codecs_expected, _, bb_codecs_expected = _parse_chunk_encoding_v3(
1069+
filters=filters, compressors=compressors, dtype=np.dtype(dtype)
10841070
)
1085-
# TODO: find a better way to get the filters from the array.
1071+
# TODO: find a better way to get the filters / compressors from the array.
10861072
assert arr.codec_pipeline.array_array_codecs == aa_codecs_expected # type: ignore[attr-defined]
1073+
assert arr.codec_pipeline.bytes_bytes_codecs == bb_codecs_expected # type: ignore[attr-defined]
1074+
1075+
1076+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
1077+
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
1078+
@pytest.mark.parametrize(
1079+
"compressors",
1080+
[
1081+
"auto",
1082+
None,
1083+
numcodecs.Zstd(level=3),
1084+
],
1085+
)
1086+
@pytest.mark.parametrize(
1087+
"filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)]
1088+
)
1089+
async def test_create_array_v2_chunk_encoding(
1090+
store: MemoryStore, compressors: CompressorsParam, filters: FiltersParam, dtype: str
1091+
) -> None:
1092+
arr = await create_array(
1093+
store=store,
1094+
dtype=dtype,
1095+
shape=(10,),
1096+
zarr_format=2,
1097+
compressors=compressors,
1098+
filters=filters,
1099+
)
1100+
filters_expected, compressor_expected = _parse_chunk_encoding_v2(
1101+
filters=filters, compressor=compressors, dtype=np.dtype(dtype)
1102+
)
1103+
# TODO: find a better way to get the filters/compressor from the array.
1104+
assert arr.metadata.compressor == compressor_expected # type: ignore[union-attr]
1105+
assert arr.metadata.filters == filters_expected # type: ignore[union-attr]
10871106

10881107

10891108
@pytest.mark.parametrize("store", ["memory"], indirect=True)

0 commit comments

Comments
 (0)