Skip to content

Commit 80dfc40

Browse files
committed
use v3_default_codecs
1 parent 6954b60 commit 80dfc40

File tree

3 files changed

+61
-57
lines changed

3 files changed

+61
-57
lines changed

src/zarr/codecs/__init__.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Any
4-
5-
if TYPE_CHECKING:
6-
import numpy as np
7-
83
from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
94
from zarr.codecs.bytes import BytesCodec, Endian
105
from zarr.codecs.crc32c_ import Crc32cCodec
@@ -13,7 +8,6 @@
138
from zarr.codecs.transpose import TransposeCodec
149
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
1510
from zarr.codecs.zstd import ZstdCodec
16-
from zarr.core.metadata.v3 import DataType
1711

1812
__all__ = [
1913
"BloscCname",
@@ -30,15 +24,3 @@
3024
"VLenUTF8Codec",
3125
"ZstdCodec",
3226
]
33-
34-
35-
def _get_default_array_bytes_codec(
36-
np_dtype: np.dtype[Any],
37-
) -> BytesCodec | VLenUTF8Codec | VLenBytesCodec:
38-
dtype = DataType.from_numpy(np_dtype)
39-
if dtype == DataType.string:
40-
return VLenUTF8Codec()
41-
elif dtype == DataType.bytes:
42-
return VLenBytesCodec()
43-
else:
44-
return BytesCodec()

src/zarr/core/array.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from zarr._compat import _deprecate_positional_args
1414
from zarr.abc.store import Store, set_or_delete
15-
from zarr.codecs import _get_default_array_bytes_codec
1615
from zarr.codecs._v2 import V2Codec
1716
from zarr.core._info import ArrayInfo
1817
from zarr.core.attributes import Attributes
@@ -78,7 +77,7 @@
7877
T_ArrayMetadata,
7978
)
8079
from zarr.core.metadata.v2 import _default_filters_and_compressor
81-
from zarr.core.metadata.v3 import parse_node_type_array
80+
from zarr.core.metadata.v3 import DataType, parse_node_type_array
8281
from zarr.core.sync import sync
8382
from zarr.errors import MetadataValidationError
8483
from zarr.registry import get_pipeline_class
@@ -556,11 +555,7 @@ async def _create_v3(
556555
await ensure_no_existing_node(store_path, zarr_format=3)
557556

558557
shape = parse_shapelike(shape)
559-
codecs = (
560-
list(codecs)
561-
if codecs is not None
562-
else [_get_default_array_bytes_codec(np.dtype(dtype))]
563-
)
558+
codecs = list(codecs) if codecs is not None else _get_default_codecs(np.dtype(dtype))
564559

565560
if chunk_key_encoding is None:
566561
chunk_key_encoding = ("default", "/")
@@ -3318,3 +3313,18 @@ def _build_parents(
33183313
)
33193314

33203315
return parents
3316+
3317+
3318+
def _get_default_codecs(
3319+
np_dtype: np.dtype[Any],
3320+
) -> list[dict[str, JSON]]:
3321+
default_codecs = config.get("array.v3_default_codecs")
3322+
dtype = DataType.from_numpy(np_dtype)
3323+
if dtype == DataType.string:
3324+
dtype_key = "string"
3325+
elif dtype == DataType.bytes:
3326+
dtype_key = "bytes"
3327+
else:
3328+
dtype_key = "numeric"
3329+
3330+
return [{"name": codec_id, "configuration": {}} for codec_id in default_codecs[dtype_key]]

tests/test_config.py

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,15 @@
1111
from zarr import Array, zeros
1212
from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline
1313
from zarr.abc.store import ByteSetter, Store
14-
from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec, TransposeCodec, GzipCodec, VLenBytesCodec, \
15-
VLenUTF8Codec
14+
from zarr.codecs import (
15+
BloscCodec,
16+
BytesCodec,
17+
Crc32cCodec,
18+
GzipCodec,
19+
ShardingCodec,
20+
VLenBytesCodec,
21+
VLenUTF8Codec,
22+
)
1623
from zarr.core.array_spec import ArraySpec
1724
from zarr.core.buffer import NDBuffer
1825
from zarr.core.codec_pipeline import BatchedCodecPipeline
@@ -216,39 +223,44 @@ def test_config_buffer_implementation() -> None:
216223
arr[:] = np.arange(100)
217224

218225
register_buffer(TestBuffer)
219-
config.set({"buffer": fully_qualified_name(TestBuffer)})
220-
assert get_buffer_class() == TestBuffer
221-
222-
# no error using TestBuffer
223-
data = np.arange(100)
224-
arr[:] = np.arange(100)
225-
assert np.array_equal(arr[:], data)
226-
227-
data2d = np.arange(1000).reshape(100, 10)
228-
arr_sharding = zeros(
229-
shape=(100, 10),
230-
store=StoreExpectingTestBuffer(),
231-
codecs=[ShardingCodec(chunk_shape=(10, 10))],
232-
)
233-
arr_sharding[:] = data2d
234-
assert np.array_equal(arr_sharding[:], data2d)
226+
with config.set({"buffer": fully_qualified_name(TestBuffer)}):
227+
assert get_buffer_class() == TestBuffer
228+
229+
# no error using TestBuffer
230+
data = np.arange(100)
231+
arr[:] = np.arange(100)
232+
assert np.array_equal(arr[:], data)
233+
234+
data2d = np.arange(1000).reshape(100, 10)
235+
arr_sharding = zeros(
236+
shape=(100, 10),
237+
store=StoreExpectingTestBuffer(),
238+
codecs=[ShardingCodec(chunk_shape=(10, 10))],
239+
)
240+
arr_sharding[:] = data2d
241+
assert np.array_equal(arr_sharding[:], data2d)
242+
243+
arr_Crc32c = zeros(
244+
shape=(100, 10),
245+
store=StoreExpectingTestBuffer(),
246+
codecs=[BytesCodec(), Crc32cCodec()],
247+
)
248+
arr_Crc32c[:] = data2d
249+
assert np.array_equal(arr_Crc32c[:], data2d)
235250

236-
arr_Crc32c = zeros(
237-
shape=(100, 10),
238-
store=StoreExpectingTestBuffer(),
239-
codecs=[BytesCodec(), Crc32cCodec()],
240-
)
241-
arr_Crc32c[:] = data2d
242-
assert np.array_equal(arr_Crc32c[:], data2d)
243251

244252
@pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
245-
def test_default_codecs(dtype:str) -> None:
246-
with config.set({"array.v3_default_codecs": {
247-
"numeric": ["bytes", "gzip"], # test setting non-standard codecs
248-
"string": ["vlen-utf8"],
249-
"bytes": ["vlen-bytes"],
250-
}}):
251-
arr = zeros(shape=(100), store=StoreExpectingTestBuffer(), dtype=dtype)
253+
def test_default_codecs(dtype: str) -> None:
254+
with config.set(
255+
{
256+
"array.v3_default_codecs": {
257+
"numeric": ["bytes", "gzip"], # test setting non-standard codecs
258+
"string": ["vlen-utf8"],
259+
"bytes": ["vlen-bytes"],
260+
}
261+
}
262+
):
263+
arr = zeros(shape=(100), dtype=dtype)
252264
if dtype == "int":
253265
assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
254266
elif dtype == "bytes":

0 commit comments

Comments
 (0)