Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/zarr/abc/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def to_dict(self) -> dict[str, JSON]:
are instances of `Metadata`. Sequences of `Metadata` are similarly recursed into, and
the output of that recursion is collected in a list.
"""
...
out_dict = {}
for field in fields(self):
key = field.name
Expand Down
29 changes: 27 additions & 2 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import base64
from collections.abc import Iterable
from enum import Enum
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import Any, Literal, Self
Expand Down Expand Up @@ -31,7 +32,7 @@ class ArrayV2Metadata(ArrayMetadata):
shape: ChunkCoords
chunk_grid: RegularChunkGrid
data_type: np.dtype[Any]
fill_value: None | int | float = 0
fill_value: None | int | float | str | bytes = 0
order: Literal["C", "F"] = "C"
filters: tuple[numcodecs.abc.Codec, ...] | None = None
dimension_separator: Literal[".", "/"] = "."
Expand Down Expand Up @@ -140,6 +141,22 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
_data = data.copy()
# check that the zarr_format attribute is correct
_ = parse_zarr_format(_data.pop("zarr_format"))
dtype = parse_dtype(_data["dtype"])

if dtype.kind in "SV":
fill_value_encoded = _data.get("fill_value")
if fill_value_encoded is not None:
if dtype.kind == "S":
try:
fill_value = base64.standard_b64decode(fill_value_encoded)
_data["fill_value"] = fill_value
except Exception:
# be lenient, allow for other values that may have been used before base64
# encoding and may work as fill values, e.g., the number 0
pass
elif dtype.kind == "V":
fill_value = base64.standard_b64decode(fill_value_encoded)
_data["fill_value"] = fill_value

# zarr v2 allowed arbitrary keys here.
# We don't want the ArrayV2Metadata constructor to fail just because someone put an
Expand All @@ -155,6 +172,14 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:

def to_dict(self) -> dict[str, JSON]:
zarray_dict = super().to_dict()

if self.dtype.kind in "SV" and self.fill_value is not None:
# There's a relationship between self.dtype and self.fill_value
# that mypy isn't aware of. The fact that we have S or V dtype here
# means we should have a bytes-type fill_value.
fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
zarray_dict["fill_value"] = fill_value

_ = zarray_dict.pop("chunk_grid")
zarray_dict["chunks"] = self.chunk_grid.chunk_shape

Expand Down
37 changes: 37 additions & 0 deletions tests/v3/test_v2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from collections.abc import Iterator

import numpy as np
Expand All @@ -6,6 +7,8 @@
from numcodecs.blosc import Blosc

import zarr
import zarr.core.buffer.cpu
import zarr.storage
from zarr import Array
from zarr.storage import MemoryStore, StorePath

Expand Down Expand Up @@ -46,3 +49,37 @@ def test_codec_pipeline() -> None:
result = array[:]
expected = np.ones(1)
np.testing.assert_array_equal(result, expected)


@pytest.mark.parametrize("dtype", ["|S", "|V"])
async def test_v2_encode_decode(dtype):
store = zarr.storage.MemoryStore(mode="w")
g = zarr.group(store=store, zarr_format=2)
g.create_array(
name="foo",
shape=(3,),
chunks=(3,),
dtype=dtype,
fill_value=b"X",
)

result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
assert result is not None

serialized = json.loads(result.to_bytes())
expected = {
"chunks": [3],
"compressor": None,
"dtype": f"{dtype}0",
"fill_value": "WA==",
"filters": None,
"order": "C",
"shape": [3],
"zarr_format": 2,
"dimension_separator": ".",
}
assert serialized == expected

data = zarr.open_array(store=store, path="foo")[:]
expected = np.full((3,), b"X", dtype=dtype)
np.testing.assert_equal(data, expected)