22
33import base64
44import warnings
5- from collections.abc import Iterable
5+ from collections.abc import Iterable, Sequence
66from enum import Enum
77from functools import cached_property
8- from typing import TYPE_CHECKING, TypedDict, cast
8+ from typing import TYPE_CHECKING, Any, TypedDict, cast
99
1010import numcodecs.abc
1111
1212from zarr.abc.metadata import Metadata
1313
1414if TYPE_CHECKING:
15- from typing import Any, Literal, Self
15+ from typing import Literal, Self
1616
1717 import numpy.typing as npt
1818
1919 from zarr.core.buffer import Buffer, BufferPrototype
2020 from zarr.core.common import ChunkCoords
2121
2222import json
23+ import numbers
2324from dataclasses import dataclass, field, fields, replace
2425
2526import numcodecs
@@ -146,41 +147,39 @@ def _json_convert(
146147 raise TypeError
147148
148149 zarray_dict = self.to_dict()
150+ zarray_dict["fill_value"] = _serialize_fill_value(self.fill_value, self.dtype)
149151 zattrs_dict = zarray_dict.pop("attributes", {})
150152 json_indent = config.get("json_indent")
151153 return {
152154 ZARRAY_JSON: prototype.buffer.from_bytes(
153- json.dumps(zarray_dict, default=_json_convert, indent=json_indent).encode()
155+ json.dumps(
156+ zarray_dict, default=_json_convert, indent=json_indent, allow_nan=False
157+ ).encode()
154158 ),
155159 ZATTRS_JSON: prototype.buffer.from_bytes(
156- json.dumps(zattrs_dict, indent=json_indent).encode()
160+ json.dumps(zattrs_dict, indent=json_indent, allow_nan=False ).encode()
157161 ),
158162 }
159163
160164 @classmethod
161165 def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
162- # make a copy to protect the original from modification
166+ # Make a copy to protect the original from modification.
163167 _data = data.copy()
164- # check that the zarr_format attribute is correct
168+ # Check that the zarr_format attribute is correct.
165169 _ = parse_zarr_format(_data.pop("zarr_format"))
166- dtype = parse_dtype(_data["dtype"])
167170
168- if dtype.kind in "SV":
169- fill_value_encoded = _data.get("fill_value")
170- if fill_value_encoded is not None:
171- fill_value = base64.standard_b64decode(fill_value_encoded)
172- _data["fill_value"] = fill_value
173-
174- # zarr v2 allowed arbitrary keys here.
175- # We don't want the ArrayV2Metadata constructor to fail just because someone put an
176- # extra key in the metadata.
171+ # zarr v2 allowed arbitrary keys in the metadata.
172+ # Filter the keys to only those expected by the constructor.
177173 expected = {x.name for x in fields(cls)}
178- # https://github.com/zarr-developers/zarr-python/issues/2269
179- # handle the renames
180174 expected |= {"dtype", "chunks"}
181175
182176 # check if `filters` is an empty sequence; if so use None instead and raise a warning
183- if _data["filters"] is not None and len(_data["filters"]) == 0:
177+ filters = _data.get("filters")
178+ if (
179+ isinstance(filters, Sequence)
180+ and not isinstance(filters, (str, bytes))
181+ and len(filters) == 0
182+ ):
184183 msg = (
185184 "Found an empty list of filters in the array metadata document. "
186185 "This is contrary to the Zarr V2 specification, and will cause an error in the future. "
@@ -196,13 +195,6 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
196195 def to_dict(self) -> dict[str, JSON]:
197196 zarray_dict = super().to_dict()
198197
199- if self.dtype.kind in "SV" and self.fill_value is not None:
200- # There's a relationship between self.dtype and self.fill_value
201- # that mypy isn't aware of. The fact that we have S or V dtype here
202- # means we should have a bytes-type fill_value.
203- fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
204- zarray_dict["fill_value"] = fill_value
205-
206198 _ = zarray_dict.pop("dtype")
207199 dtype_json: JSON
208200 # In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
@@ -300,7 +292,26 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
300292 return data
301293
302294
303- def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
295+ def _parse_structured_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
296+ """Handle structured dtype/fill value pairs"""
297+ print("FILL VALUE", fill_value, "DT", dtype)
298+ try:
299+ if isinstance(fill_value, list):
300+ return np.array([tuple(fill_value)], dtype=dtype)[0]
301+ elif isinstance(fill_value, tuple):
302+ return np.array([fill_value], dtype=dtype)[0]
303+ elif isinstance(fill_value, bytes):
304+ return np.frombuffer(fill_value, dtype=dtype)[0]
305+ elif isinstance(fill_value, str):
306+ decoded = base64.standard_b64decode(fill_value)
307+ return np.frombuffer(decoded, dtype=dtype)[0]
308+ else:
309+ return np.array(fill_value, dtype=dtype)[()]
310+ except Exception as e:
311+ raise ValueError(f"Fill_value {fill_value} is not valid for dtype {dtype}.") from e
312+
313+
314+ def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
304315 """
305316 Parse a potential fill value into a value that is compatible with the provided dtype.
306317
@@ -317,13 +328,16 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
317328 """
318329
319330 if fill_value is None or dtype.hasobject:
320- # no fill value
321331 pass
332+ elif dtype.fields is not None:
333+ # the dtype is structured (has multiple fields), so the fill_value might be a
334+ # compound value (e.g., a tuple or dict) that needs field-wise processing.
335+ # We use parse_structured_fill_value to correctly convert each component.
336+ fill_value = _parse_structured_fill_value(fill_value, dtype)
322337 elif not isinstance(fill_value, np.void) and fill_value == 0:
323338 # this should be compatible across numpy versions for any array type, including
324339 # structured arrays
325340 fill_value = np.zeros((), dtype=dtype)[()]
326-
327341 elif dtype.kind == "U":
328342 # special case unicode because of encoding issues on Windows if passed through numpy
329343 # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713
@@ -332,6 +346,11 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
332346 raise ValueError(
333347 f"fill_value {fill_value!r} is not valid for dtype {dtype}; must be a unicode string"
334348 )
349+ elif dtype.kind in "SV" and isinstance(fill_value, str):
350+ fill_value = base64.standard_b64decode(fill_value)
351+ elif dtype.kind == "c" and isinstance(fill_value, list) and len(fill_value) == 2:
352+ complex_val = complex(float(fill_value[0]), float(fill_value[1]))
353+ fill_value = np.array(complex_val, dtype=dtype)[()]
335354 else:
336355 try:
337356 if isinstance(fill_value, bytes) and dtype.kind == "V":
@@ -347,6 +366,39 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
347366 return fill_value
348367
349368
369+ def _serialize_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> JSON:
370+ serialized: JSON
371+
372+ if fill_value is None:
373+ serialized = None
374+ elif dtype.kind in "SV":
375+ # There's a relationship between dtype and fill_value
376+ # that mypy isn't aware of. The fact that we have S or V dtype here
377+ # means we should have a bytes-type fill_value.
378+ serialized = base64.standard_b64encode(cast(bytes, fill_value)).decode("ascii")
379+ elif isinstance(fill_value, np.datetime64):
380+ serialized = np.datetime_as_string(fill_value)
381+ elif isinstance(fill_value, numbers.Integral):
382+ serialized = int(fill_value)
383+ elif isinstance(fill_value, numbers.Real):
384+ float_fv = float(fill_value)
385+ if np.isnan(float_fv):
386+ serialized = "NaN"
387+ elif np.isinf(float_fv):
388+ serialized = "Infinity" if float_fv > 0 else "-Infinity"
389+ else:
390+ serialized = float_fv
391+ elif isinstance(fill_value, numbers.Complex):
392+ serialized = [
393+ _serialize_fill_value(fill_value.real, dtype),
394+ _serialize_fill_value(fill_value.imag, dtype),
395+ ]
396+ else:
397+ serialized = fill_value
398+
399+ return serialized
400+
401+
350402def _default_fill_value(dtype: np.dtype[Any]) -> Any:
351403 """
352404 Get the default fill value for a type.
0 commit comments