Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/zarr/abc/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def to_dict(self) -> dict[str, JSON]:
are instances of `Metadata`. Sequences of `Metadata` are similarly recursed into, and
the output of that recursion is collected in a list.
"""
...
out_dict = {}
for field in fields(self):
key = field.name
Expand Down
30 changes: 28 additions & 2 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import base64
from collections.abc import Iterable
from enum import Enum
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import Any, Literal, Self
Expand Down Expand Up @@ -31,7 +32,7 @@ class ArrayV2Metadata(ArrayMetadata):
shape: ChunkCoords
chunk_grid: RegularChunkGrid
data_type: np.dtype[Any]
fill_value: None | int | float = 0
fill_value: None | int | float | str | bytes = 0
order: Literal["C", "F"] = "C"
filters: tuple[numcodecs.abc.Codec, ...] | None = None
dimension_separator: Literal[".", "/"] = "."
Expand Down Expand Up @@ -140,10 +141,35 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
_data = data.copy()
# check that the zarr_format attribute is correct
_ = parse_zarr_format(_data.pop("zarr_format"))
dtype = parse_dtype(_data["dtype"])

if dtype.kind in "SV":
fill_value_encoded = _data.get("fill_value")
if fill_value_encoded is not None:
if dtype.kind == "S":
try:
fill_value = base64.standard_b64decode(fill_value_encoded)
_data["fill_value"] = fill_value
except Exception:
# be lenient, allow for other values that may have been used before base64
# encoding and may work as fill values, e.g., the number 0
pass
elif dtype.kind == "V":
fill_value = base64.standard_b64encode(fill_value_encoded)
_data["fill_value"] = fill_value

return cls(**_data)

def to_dict(self) -> dict[str, JSON]:
zarray_dict = super().to_dict()

if self.dtype.kind in "SV":
# There's a relationship between self.dtype and self.fill_value
# that mypy isn't aware of. The fact that we have S or V dtype here
# means we should have a bytes-type fill_value.
fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
zarray_dict["fill_value"] = fill_value

_ = zarray_dict.pop("chunk_grid")
zarray_dict["chunks"] = self.chunk_grid.chunk_shape

Expand Down
39 changes: 39 additions & 0 deletions tests/v3/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from numcodecs.blosc import Blosc

import zarr
import zarr.core.buffer.cpu
from zarr import Array
from zarr.storage import MemoryStore, StorePath

Expand Down Expand Up @@ -46,3 +47,41 @@ def test_codec_pipeline() -> None:
result = array[:]
expected = np.ones(1)
np.testing.assert_array_equal(result, expected)


async def test_v2_encode_decode():
import json

import zarr.core.buffer.cpu
import zarr.storage

store = zarr.storage.MemoryStore(mode="w")
g = zarr.group(store=store, zarr_format=2)
g.create_array(
name="foo",
shape=(3,),
dtype="|S4",
fill_value=b"X",
)

result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
assert result is not None

serialized = json.loads(result.to_bytes())
expected = {
"chunks": [3],
# "compressor": {"blocksize": 0, "clevel": 5, "cname": "lz4", "id": "blosc", "shuffle": 1},
"compressor": None,
"dtype": "|S4",
"fill_value": "WA==",
"filters": None,
"order": "C",
"shape": [3],
"zarr_format": 2,
"dimension_separator": ".",
}
assert serialized == expected

data = zarr.open_array(store=store, path="foo")[:]
expected = np.full((3,), b"X", dtype="|S4")
np.testing.assert_equal(data, expected)