Skip to content

Commit f63bb67

Browse files
committed
fix string codecs for np1.25
1 parent f04e0e6 commit f63bb67

File tree

3 files changed

+19
-13
lines changed

3 files changed

+19
-13
lines changed

.github/workflows/test.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ jobs:
1919
name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
2020

2121
strategy:
22-
fail-fast: false
2322
matrix:
2423
python-version: ['3.11', '3.12', '3.13']
2524
numpy-version: ['1.25', '2.1']

src/zarr/core/metadata/v3.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
)
3838
from zarr.core.config import config
3939
from zarr.core.metadata.common import parse_attributes
40-
from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE
40+
from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE, _NUMPY_SUPPORTS_VLEN_STRING
4141
from zarr.errors import MetadataValidationError, NodeTypeValidationError
4242
from zarr.registry import get_codec_class
4343

@@ -606,6 +606,10 @@ def from_numpy(cls, dtype: np.dtype[Any]) -> DataType:
606606
return DataType.string
607607
elif dtype.kind == "S":
608608
return DataType.bytes
609+
elif not _NUMPY_SUPPORTS_VLEN_STRING and dtype.kind == "O":
610+
# numpy < 2.0 does not support vlen string dtype
611+
# so we fall back on object array of strings
612+
return DataType.string
609613
dtype_to_data_type = {
610614
"|b1": "bool",
611615
"bool": "bool",

tests/test_config.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import pytest
99

1010
import zarr
11-
from zarr import Array, zeros
12-
from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline
11+
from zarr import Array, zeros, AsyncArray
12+
from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline, Codec
1313
from zarr.abc.store import ByteSetter, Store
1414
from zarr.codecs import (
1515
BloscCodec,
@@ -25,6 +25,7 @@
2525
from zarr.core.codec_pipeline import BatchedCodecPipeline
2626
from zarr.core.config import BadConfigError, config
2727
from zarr.core.indexing import SelectorTuple
28+
from zarr.core.strings import _STRING_DTYPE
2829
from zarr.registry import (
2930
fully_qualified_name,
3031
get_buffer_class,
@@ -36,6 +37,7 @@
3637
register_ndbuffer,
3738
register_pipeline,
3839
)
40+
from zarr.storage import MemoryStore
3941
from zarr.testing.buffer import (
4042
NDBufferUsingTestNDArrayLike,
4143
StoreExpectingTestBuffer,
@@ -254,8 +256,14 @@ def test_config_buffer_implementation() -> None:
254256
assert np.array_equal(arr_Crc32c[:], data2d)
255257

256258

257-
@pytest.mark.parametrize("dtype", ["int", "bytes", str])
258-
def test_default_codecs(dtype: str) -> None:
259+
@pytest.mark.parametrize(("dtype", "expected_codecs"),
260+
[
261+
("int", [BytesCodec(), GzipCodec()]),
262+
("bytes", [VLenBytesCodec()]),
263+
("str", [VLenUTF8Codec()]),
264+
]
265+
)
266+
async def test_default_codecs(dtype: str, expected_codecs: list[Codec]) -> None:
259267
with config.set(
260268
{
261269
"array.v3_default_codecs": {
@@ -265,10 +273,5 @@ def test_default_codecs(dtype: str) -> None:
265273
}
266274
}
267275
):
268-
arr = zeros(shape=(100), dtype=np.dtype(dtype), zarr_format=3)
269-
if dtype == "int":
270-
assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
271-
elif dtype == "bytes":
272-
assert arr.metadata.codecs == [VLenBytesCodec()]
273-
elif dtype == "str":
274-
assert arr.metadata.codecs == [VLenUTF8Codec()]
276+
arr = await AsyncArray.create(shape=(100,), chunk_shape=(100,),dtype=np.dtype(dtype), zarr_format=3, store=MemoryStore())
277+
assert arr.metadata.codecs == expected_codecs

0 commit comments

Comments
 (0)