diff --git a/changes/2998.bugfix.md b/changes/2998.bugfix.md new file mode 100644 index 0000000000..7b94223122 --- /dev/null +++ b/changes/2998.bugfix.md @@ -0,0 +1 @@ +Fix structured `dtype` fill value serialization for consolidated metadata \ No newline at end of file diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 3f8dad1740..3f4f15b9e9 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import base64 import itertools import json import logging @@ -358,7 +359,13 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs) if "shape" in v: # it's an array - d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v) + if isinstance(v.get("fill_value", None), np.void): + v["fill_value"] = base64.standard_b64encode( + cast(bytes, v["fill_value"]) + ).decode("ascii") + else: + v = _replace_special_floats(v) + d[f"{k}/{ZARRAY_JSON}"] = v else: d[f"{k}/{ZGROUP_JSON}"] = { "zarr_format": self.zarr_format, diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 4600a977d4..08b9cb2507 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -316,3 +316,28 @@ def test_zstd_checksum() -> None: arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes() ) assert "checksum" not in metadata["compressor"] + + +@pytest.mark.parametrize( + "fill_value", [None, np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))] +) +def test_structured_dtype_fill_value_serialization(tmp_path, fill_value): + group_path = tmp_path / "test.zarr" + root_group = zarr.open_group(group_path, mode="w", zarr_format=2) + dtype = np.dtype([("foo", "i4"), ("bar", "i4")]) + root_group.create_array( + name="structured_dtype", + shape=(100, 100), + chunks=(100, 100), + dtype=dtype, + fill_value=fill_value, + ) + + zarr.consolidate_metadata(root_group.store, zarr_format=2) + root_group = zarr.open_group(group_path, mode="r") + assert ( + root_group.metadata.consolidated_metadata.to_dict()["metadata"]["structured_dtype"][ + "fill_value" + ] + == fill_value + )