Skip to content

Commit fe9cc85

Browse files
committed
wip - fill value
1 parent f3a2e0a commit fe9cc85

File tree

3 files changed

+67
-3
lines changed

3 files changed

+67
-3
lines changed

src/zarr/abc/metadata.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ def to_dict(self) -> dict[str, JSON]:
2222
are instances of `Metadata`. Sequences of `Metadata` are similarly recursed into, and
2323
the output of that recursion is collected in a list.
2424
"""
25-
...
2625
out_dict = {}
2726
for field in fields(self):
2827
key = field.name

src/zarr/core/metadata/v2.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import annotations
22

3+
import base64
34
from collections.abc import Iterable
45
from enum import Enum
5-
from typing import TYPE_CHECKING
6+
from typing import TYPE_CHECKING, cast
67

78
if TYPE_CHECKING:
89
from typing import Any, Literal, Self
@@ -31,7 +32,7 @@ class ArrayV2Metadata(ArrayMetadata):
3132
shape: ChunkCoords
3233
chunk_grid: RegularChunkGrid
3334
data_type: np.dtype[Any]
34-
fill_value: None | int | float = 0
35+
fill_value: None | int | float | str | bytes = 0
3536
order: Literal["C", "F"] = "C"
3637
filters: tuple[numcodecs.abc.Codec, ...] | None = None
3738
dimension_separator: Literal[".", "/"] = "."
@@ -140,10 +141,35 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
140141
_data = data.copy()
141142
# check that the zarr_format attribute is correct
142143
_ = parse_zarr_format(_data.pop("zarr_format"))
144+
dtype = parse_dtype(_data["dtype"])
145+
146+
if dtype.kind in "SV":
147+
fill_value_encoded = _data.get("fill_value")
148+
if fill_value_encoded is not None:
149+
if dtype.kind == "S":
150+
try:
151+
fill_value = base64.standard_b64decode(fill_value_encoded)
152+
_data["fill_value"] = fill_value
153+
except Exception:
154+
# be lenient, allow for other values that may have been used before base64
155+
# encoding and may work as fill values, e.g., the number 0
156+
pass
157+
elif dtype.kind == "V":
158+
fill_value = base64.standard_b64encode(fill_value_encoded)
159+
_data["fill_value"] = fill_value
160+
143161
return cls(**_data)
144162

145163
def to_dict(self) -> dict[str, JSON]:
146164
zarray_dict = super().to_dict()
165+
166+
if self.dtype.kind in "SV":
167+
# There's a relationship between self.dtype and self.fill_value
168+
# that mypy isn't aware of. The fact that we have S or V dtype here
169+
# means we should have a bytes-type fill_value.
170+
fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
171+
zarray_dict["fill_value"] = fill_value
172+
147173
_ = zarray_dict.pop("chunk_grid")
148174
zarray_dict["chunks"] = self.chunk_grid.chunk_shape
149175

tests/v3/test_v2.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from numcodecs.blosc import Blosc
77

88
import zarr
9+
import zarr.core.buffer.cpu
910
from zarr import Array
1011
from zarr.storage import MemoryStore, StorePath
1112

@@ -46,3 +47,41 @@ def test_codec_pipeline() -> None:
4647
result = array[:]
4748
expected = np.ones(1)
4849
np.testing.assert_array_equal(result, expected)
50+
51+
52+
async def test_v2_encode_decode():
53+
import json
54+
55+
import zarr.core.buffer.cpu
56+
import zarr.storage
57+
58+
store = zarr.storage.MemoryStore(mode="w")
59+
g = zarr.group(store=store, zarr_format=2)
60+
g.create_array(
61+
name="foo",
62+
shape=(3,),
63+
dtype="|S4",
64+
fill_value=b"X",
65+
)
66+
67+
result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
68+
assert result is not None
69+
70+
serialized = json.loads(result.to_bytes())
71+
expected = {
72+
"chunks": [3],
73+
# "compressor": {"blocksize": 0, "clevel": 5, "cname": "lz4", "id": "blosc", "shuffle": 1},
74+
"compressor": None,
75+
"dtype": "|S4",
76+
"fill_value": "WA==",
77+
"filters": None,
78+
"order": "C",
79+
"shape": [3],
80+
"zarr_format": 2,
81+
"dimension_separator": ".",
82+
}
83+
assert serialized == expected
84+
85+
data = zarr.open_array(store=store, path="foo")[:]
86+
expected = np.full((3,), b"X", dtype="|S4")
87+
np.testing.assert_equal(data, expected)

0 commit comments

Comments
 (0)