|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
| 3 | +import base64 |
3 | 4 | from collections.abc import Iterable |
4 | 5 | from enum import Enum |
5 | | -from typing import TYPE_CHECKING |
| 6 | +from typing import TYPE_CHECKING, cast |
6 | 7 |
|
7 | 8 | if TYPE_CHECKING: |
8 | 9 | from typing import Any, Literal, Self |
@@ -31,7 +32,7 @@ class ArrayV2Metadata(ArrayMetadata): |
31 | 32 | shape: ChunkCoords |
32 | 33 | chunk_grid: RegularChunkGrid |
33 | 34 | data_type: np.dtype[Any] |
34 | | - fill_value: None | int | float = 0 |
| 35 | + fill_value: None | int | float | str | bytes = 0 |
35 | 36 | order: Literal["C", "F"] = "C" |
36 | 37 | filters: tuple[numcodecs.abc.Codec, ...] | None = None |
37 | 38 | dimension_separator: Literal[".", "/"] = "." |
@@ -140,10 +141,35 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: |
140 | 141 | _data = data.copy() |
141 | 142 | # check that the zarr_format attribute is correct |
142 | 143 | _ = parse_zarr_format(_data.pop("zarr_format")) |
| 144 | + dtype = parse_dtype(_data["dtype"]) |
| 145 | + |
| 146 | + if dtype.kind in "SV": |
| 147 | + fill_value_encoded = _data.get("fill_value") |
| 148 | + if fill_value_encoded is not None: |
| 149 | + if dtype.kind == "S": |
| 150 | + try: |
| 151 | + fill_value = base64.standard_b64decode(fill_value_encoded) |
| 152 | + _data["fill_value"] = fill_value |
| 153 | + except Exception: |
| 154 | + # be lenient, allow for other values that may have been used before base64 |
| 155 | + # encoding and may work as fill values, e.g., the number 0 |
| 156 | + pass |
| 157 | + elif dtype.kind == "V": |
| 158 | + fill_value = base64.standard_b64encode(fill_value_encoded) |
| 159 | + _data["fill_value"] = fill_value |
| 160 | + |
143 | 161 | return cls(**_data) |
144 | 162 |
|
145 | 163 | def to_dict(self) -> dict[str, JSON]: |
146 | 164 | zarray_dict = super().to_dict() |
| 165 | + |
| 166 | + if self.dtype.kind in "SV": |
| 167 | + # There's a relationship between self.dtype and self.fill_value |
| 168 | + # that mypy isn't aware of. The fact that we have S or V dtype here |
| 169 | + # means we should have a bytes-type fill_value. |
| 170 | + fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii") |
| 171 | + zarray_dict["fill_value"] = fill_value |
| 172 | + |
147 | 173 | _ = zarray_dict.pop("chunk_grid") |
148 | 174 | zarray_dict["chunks"] = self.chunk_grid.chunk_shape |
149 | 175 |
|
|
0 commit comments