22
33import base64
44import warnings
5- from collections .abc import Iterable
5+ from collections .abc import Iterable , Sequence
66from enum import Enum
77from functools import cached_property
8- from typing import TYPE_CHECKING , TypedDict , cast
8+ from typing import TYPE_CHECKING , Any , TypedDict , cast
99
1010import numcodecs .abc
1111
1212from zarr .abc .metadata import Metadata
1313
1414if TYPE_CHECKING :
15- from typing import Any , Literal , Self
15+ from typing import Literal , Self
1616
1717 import numpy .typing as npt
1818
1919 from zarr .core .buffer import Buffer , BufferPrototype
2020 from zarr .core .common import ChunkCoords
2121
2222import json
23+ import numbers
2324from dataclasses import dataclass , field , fields , replace
2425
2526import numcodecs
@@ -146,41 +147,39 @@ def _json_convert(
146147 raise TypeError
147148
148149 zarray_dict = self .to_dict ()
150+ zarray_dict ["fill_value" ] = _serialize_fill_value (self .fill_value , self .dtype )
149151 zattrs_dict = zarray_dict .pop ("attributes" , {})
150152 json_indent = config .get ("json_indent" )
151153 return {
152154 ZARRAY_JSON : prototype .buffer .from_bytes (
153- json .dumps (zarray_dict , default = _json_convert , indent = json_indent ).encode ()
155+ json .dumps (
156+ zarray_dict , default = _json_convert , indent = json_indent , allow_nan = False
157+ ).encode ()
154158 ),
155159 ZATTRS_JSON : prototype .buffer .from_bytes (
156- json .dumps (zattrs_dict , indent = json_indent ).encode ()
160+ json .dumps (zattrs_dict , indent = json_indent , allow_nan = False ).encode ()
157161 ),
158162 }
159163
160164 @classmethod
161165 def from_dict (cls , data : dict [str , Any ]) -> ArrayV2Metadata :
162- # make a copy to protect the original from modification
166+ # Make a copy to protect the original from modification.
163167 _data = data .copy ()
164- # check that the zarr_format attribute is correct
168+ # Check that the zarr_format attribute is correct.
165169 _ = parse_zarr_format (_data .pop ("zarr_format" ))
166- dtype = parse_dtype (_data ["dtype" ])
167170
168- if dtype .kind in "SV" :
169- fill_value_encoded = _data .get ("fill_value" )
170- if fill_value_encoded is not None :
171- fill_value = base64 .standard_b64decode (fill_value_encoded )
172- _data ["fill_value" ] = fill_value
173-
174- # zarr v2 allowed arbitrary keys here.
175- # We don't want the ArrayV2Metadata constructor to fail just because someone put an
176- # extra key in the metadata.
171+ # zarr v2 allowed arbitrary keys in the metadata.
172+ # Filter the keys to only those expected by the constructor.
177173 expected = {x .name for x in fields (cls )}
178- # https://github.com/zarr-developers/zarr-python/issues/2269
179- # handle the renames
180174 expected |= {"dtype" , "chunks" }
181175
182176 # check if `filters` is an empty sequence; if so use None instead and raise a warning
183- if _data ["filters" ] is not None and len (_data ["filters" ]) == 0 :
177+ filters = _data .get ("filters" )
178+ if (
179+ isinstance (filters , Sequence )
180+ and not isinstance (filters , (str , bytes ))
181+ and len (filters ) == 0
182+ ):
184183 msg = (
185184 "Found an empty list of filters in the array metadata document. "
186185 "This is contrary to the Zarr V2 specification, and will cause an error in the future. "
@@ -196,13 +195,6 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
196195 def to_dict (self ) -> dict [str , JSON ]:
197196 zarray_dict = super ().to_dict ()
198197
199- if self .dtype .kind in "SV" and self .fill_value is not None :
200- # There's a relationship between self.dtype and self.fill_value
201- # that mypy isn't aware of. The fact that we have S or V dtype here
202- # means we should have a bytes-type fill_value.
203- fill_value = base64 .standard_b64encode (cast (bytes , self .fill_value )).decode ("ascii" )
204- zarray_dict ["fill_value" ] = fill_value
205-
206198 _ = zarray_dict .pop ("dtype" )
207199 dtype_json : JSON
208200 # In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
@@ -300,7 +292,26 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
300292 return data
301293
302294
303- def parse_fill_value (fill_value : object , dtype : np .dtype [Any ]) -> Any :
295+ def _parse_structured_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> Any :
296+ """Handle structured dtype/fill value pairs"""
297+ print ("FILL VALUE" , fill_value , "DT" , dtype )
298+ try :
299+ if isinstance (fill_value , list ):
300+ return np .array ([tuple (fill_value )], dtype = dtype )[0 ]
301+ elif isinstance (fill_value , tuple ):
302+ return np .array ([fill_value ], dtype = dtype )[0 ]
303+ elif isinstance (fill_value , bytes ):
304+ return np .frombuffer (fill_value , dtype = dtype )[0 ]
305+ elif isinstance (fill_value , str ):
306+ decoded = base64 .standard_b64decode (fill_value )
307+ return np .frombuffer (decoded , dtype = dtype )[0 ]
308+ else :
309+ return np .array (fill_value , dtype = dtype )[()]
310+ except Exception as e :
311+ raise ValueError (f"Fill_value { fill_value } is not valid for dtype { dtype } ." ) from e
312+
313+
314+ def parse_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> Any :
304315 """
305316 Parse a potential fill value into a value that is compatible with the provided dtype.
306317
@@ -317,13 +328,16 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
317328 """
318329
319330 if fill_value is None or dtype .hasobject :
320- # no fill value
321331 pass
332+ elif dtype .fields is not None :
333+ # the dtype is structured (has multiple fields), so the fill_value might be a
334+ # compound value (e.g., a tuple or dict) that needs field-wise processing.
335+ # We use parse_structured_fill_value to correctly convert each component.
336+ fill_value = _parse_structured_fill_value (fill_value , dtype )
322337 elif not isinstance (fill_value , np .void ) and fill_value == 0 :
323338 # this should be compatible across numpy versions for any array type, including
324339 # structured arrays
325340 fill_value = np .zeros ((), dtype = dtype )[()]
326-
327341 elif dtype .kind == "U" :
328342 # special case unicode because of encoding issues on Windows if passed through numpy
329343 # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713
@@ -332,6 +346,11 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
332346 raise ValueError (
333347 f"fill_value { fill_value !r} is not valid for dtype { dtype } ; must be a unicode string"
334348 )
349+ elif dtype .kind in "SV" and isinstance (fill_value , str ):
350+ fill_value = base64 .standard_b64decode (fill_value )
351+ elif dtype .kind == "c" and isinstance (fill_value , list ) and len (fill_value ) == 2 :
352+ complex_val = complex (float (fill_value [0 ]), float (fill_value [1 ]))
353+ fill_value = np .array (complex_val , dtype = dtype )[()]
335354 else :
336355 try :
337356 if isinstance (fill_value , bytes ) and dtype .kind == "V" :
@@ -347,6 +366,39 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
347366 return fill_value
348367
349368
369+ def _serialize_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> JSON :
370+ serialized : JSON
371+
372+ if fill_value is None :
373+ serialized = None
374+ elif dtype .kind in "SV" :
375+ # There's a relationship between dtype and fill_value
376+ # that mypy isn't aware of. The fact that we have S or V dtype here
377+ # means we should have a bytes-type fill_value.
378+ serialized = base64 .standard_b64encode (cast (bytes , fill_value )).decode ("ascii" )
379+ elif isinstance (fill_value , np .datetime64 ):
380+ serialized = np .datetime_as_string (fill_value )
381+ elif isinstance (fill_value , numbers .Integral ):
382+ serialized = int (fill_value )
383+ elif isinstance (fill_value , numbers .Real ):
384+ float_fv = float (fill_value )
385+ if np .isnan (float_fv ):
386+ serialized = "NaN"
387+ elif np .isinf (float_fv ):
388+ serialized = "Infinity" if float_fv > 0 else "-Infinity"
389+ else :
390+ serialized = float_fv
391+ elif isinstance (fill_value , numbers .Complex ):
392+ serialized = [
393+ _serialize_fill_value (fill_value .real , dtype ),
394+ _serialize_fill_value (fill_value .imag , dtype ),
395+ ]
396+ else :
397+ serialized = fill_value
398+
399+ return serialized
400+
401+
350402def _default_fill_value (dtype : np .dtype [Any ]) -> Any :
351403 """
352404 Get the default fill value for a type.
0 commit comments