22
33import  base64 
44import  warnings 
5- from  collections .abc  import  Iterable 
5+ from  collections .abc  import  Iterable ,  Sequence 
66from  enum  import  Enum 
77from  functools  import  cached_property 
8- from  typing  import  TYPE_CHECKING , TypedDict , cast 
8+ from  typing  import  TYPE_CHECKING , Any ,  TypedDict , cast 
99
1010import  numcodecs .abc 
1111
1212from  zarr .abc .metadata  import  Metadata 
1313
1414if  TYPE_CHECKING :
15-     from  typing  import  Any ,  Literal , Self 
15+     from  typing  import  Literal , Self 
1616
1717    import  numpy .typing  as  npt 
1818
1919    from  zarr .core .buffer  import  Buffer , BufferPrototype 
2020    from  zarr .core .common  import  ChunkCoords 
2121
2222import  json 
23+ import  numbers 
2324from  dataclasses  import  dataclass , field , fields , replace 
2425
2526import  numcodecs 
@@ -146,41 +147,39 @@ def _json_convert(
146147            raise  TypeError 
147148
148149        zarray_dict  =  self .to_dict ()
150+         zarray_dict ["fill_value" ] =  _serialize_fill_value (self .fill_value , self .dtype )
149151        zattrs_dict  =  zarray_dict .pop ("attributes" , {})
150152        json_indent  =  config .get ("json_indent" )
151153        return  {
152154            ZARRAY_JSON : prototype .buffer .from_bytes (
153-                 json .dumps (zarray_dict , default = _json_convert , indent = json_indent ).encode ()
155+                 json .dumps (
156+                     zarray_dict , default = _json_convert , indent = json_indent , allow_nan = False 
157+                 ).encode ()
154158            ),
155159            ZATTRS_JSON : prototype .buffer .from_bytes (
156-                 json .dumps (zattrs_dict , indent = json_indent ).encode ()
160+                 json .dumps (zattrs_dict , indent = json_indent ,  allow_nan = False ).encode ()
157161            ),
158162        }
159163
160164    @classmethod  
161165    def  from_dict (cls , data : dict [str , Any ]) ->  ArrayV2Metadata :
162-         # make  a copy to protect the original from modification 
166+         # Make  a copy to protect the original from modification.  
163167        _data  =  data .copy ()
164-         # check  that the zarr_format attribute is correct 
168+         # Check  that the zarr_format attribute is correct.  
165169        _  =  parse_zarr_format (_data .pop ("zarr_format" ))
166-         dtype  =  parse_dtype (_data ["dtype" ])
167170
168-         if  dtype .kind  in  "SV" :
169-             fill_value_encoded  =  _data .get ("fill_value" )
170-             if  fill_value_encoded  is  not   None :
171-                 fill_value  =  base64 .standard_b64decode (fill_value_encoded )
172-                 _data ["fill_value" ] =  fill_value 
173- 
174-         # zarr v2 allowed arbitrary keys here. 
175-         # We don't want the ArrayV2Metadata constructor to fail just because someone put an 
176-         # extra key in the metadata. 
171+         # zarr v2 allowed arbitrary keys in the metadata. 
172+         # Filter the keys to only those expected by the constructor. 
177173        expected  =  {x .name  for  x  in  fields (cls )}
178-         # https://github.com/zarr-developers/zarr-python/issues/2269 
179-         # handle the renames 
180174        expected  |=  {"dtype" , "chunks" }
181175
182176        # check if `filters` is an empty sequence; if so use None instead and raise a warning 
183-         if  _data ["filters" ] is  not   None  and  len (_data ["filters" ]) ==  0 :
177+         filters  =  _data .get ("filters" )
178+         if  (
179+             isinstance (filters , Sequence )
180+             and  not  isinstance (filters , (str , bytes ))
181+             and  len (filters ) ==  0 
182+         ):
184183            msg  =  (
185184                "Found an empty list of filters in the array metadata document. " 
186185                "This is contrary to the Zarr V2 specification, and will cause an error in the future. " 
@@ -196,13 +195,6 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
196195    def  to_dict (self ) ->  dict [str , JSON ]:
197196        zarray_dict  =  super ().to_dict ()
198197
199-         if  self .dtype .kind  in  "SV"  and  self .fill_value  is  not   None :
200-             # There's a relationship between self.dtype and self.fill_value 
201-             # that mypy isn't aware of. The fact that we have S or V dtype here 
202-             # means we should have a bytes-type fill_value. 
203-             fill_value  =  base64 .standard_b64encode (cast (bytes , self .fill_value )).decode ("ascii" )
204-             zarray_dict ["fill_value" ] =  fill_value 
205- 
206198        _  =  zarray_dict .pop ("dtype" )
207199        dtype_json : JSON 
208200        # In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string 
@@ -306,7 +298,26 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
306298    return  data 
307299
308300
309- def  parse_fill_value (fill_value : object , dtype : np .dtype [Any ]) ->  Any :
301+ def  _parse_structured_fill_value (fill_value : Any , dtype : np .dtype [Any ]) ->  Any :
302+     """Handle structured dtype/fill value pairs""" 
303+     print ("FILL VALUE" , fill_value , "DT" , dtype )
304+     try :
305+         if  isinstance (fill_value , list ):
306+             return  np .array ([tuple (fill_value )], dtype = dtype )[0 ]
307+         elif  isinstance (fill_value , tuple ):
308+             return  np .array ([fill_value ], dtype = dtype )[0 ]
309+         elif  isinstance (fill_value , bytes ):
310+             return  np .frombuffer (fill_value , dtype = dtype )[0 ]
311+         elif  isinstance (fill_value , str ):
312+             decoded  =  base64 .standard_b64decode (fill_value )
313+             return  np .frombuffer (decoded , dtype = dtype )[0 ]
314+         else :
315+             return  np .array (fill_value , dtype = dtype )[()]
316+     except  Exception  as  e :
317+         raise  ValueError (f"Fill_value { fill_value }   is not valid for dtype { dtype }  ." ) from  e 
318+ 
319+ 
320+ def  parse_fill_value (fill_value : Any , dtype : np .dtype [Any ]) ->  Any :
310321    """ 
311322    Parse a potential fill value into a value that is compatible with the provided dtype. 
312323
@@ -323,13 +334,16 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
323334    """ 
324335
325336    if  fill_value  is  None  or  dtype .hasobject :
326-         # no fill value 
327337        pass 
338+     elif  dtype .fields  is  not   None :
339+         # the dtype is structured (has multiple fields), so the fill_value might be a 
340+         # compound value (e.g., a tuple or dict) that needs field-wise processing. 
341+         # We use parse_structured_fill_value to correctly convert each component. 
342+         fill_value  =  _parse_structured_fill_value (fill_value , dtype )
328343    elif  not  isinstance (fill_value , np .void ) and  fill_value  ==  0 :
329344        # this should be compatible across numpy versions for any array type, including 
330345        # structured arrays 
331346        fill_value  =  np .zeros ((), dtype = dtype )[()]
332- 
333347    elif  dtype .kind  ==  "U" :
334348        # special case unicode because of encoding issues on Windows if passed through numpy 
335349        # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 
@@ -338,6 +352,11 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
338352            raise  ValueError (
339353                f"fill_value { fill_value !r}   is not valid for dtype { dtype }  ; must be a unicode string" 
340354            )
355+     elif  dtype .kind  in  "SV"  and  isinstance (fill_value , str ):
356+         fill_value  =  base64 .standard_b64decode (fill_value )
357+     elif  dtype .kind  ==  "c"  and  isinstance (fill_value , list ) and  len (fill_value ) ==  2 :
358+         complex_val  =  complex (float (fill_value [0 ]), float (fill_value [1 ]))
359+         fill_value  =  np .array (complex_val , dtype = dtype )[()]
341360    else :
342361        try :
343362            if  isinstance (fill_value , bytes ) and  dtype .kind  ==  "V" :
@@ -353,6 +372,39 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
353372    return  fill_value 
354373
355374
375+ def  _serialize_fill_value (fill_value : Any , dtype : np .dtype [Any ]) ->  JSON :
376+     serialized : JSON 
377+ 
378+     if  fill_value  is  None :
379+         serialized  =  None 
380+     elif  dtype .kind  in  "SV" :
381+         # There's a relationship between dtype and fill_value 
382+         # that mypy isn't aware of. The fact that we have S or V dtype here 
383+         # means we should have a bytes-type fill_value. 
384+         serialized  =  base64 .standard_b64encode (cast (bytes , fill_value )).decode ("ascii" )
385+     elif  isinstance (fill_value , np .datetime64 ):
386+         serialized  =  np .datetime_as_string (fill_value )
387+     elif  isinstance (fill_value , numbers .Integral ):
388+         serialized  =  int (fill_value )
389+     elif  isinstance (fill_value , numbers .Real ):
390+         float_fv  =  float (fill_value )
391+         if  np .isnan (float_fv ):
392+             serialized  =  "NaN" 
393+         elif  np .isinf (float_fv ):
394+             serialized  =  "Infinity"  if  float_fv  >  0  else  "-Infinity" 
395+         else :
396+             serialized  =  float_fv 
397+     elif  isinstance (fill_value , numbers .Complex ):
398+         serialized  =  [
399+             _serialize_fill_value (fill_value .real , dtype ),
400+             _serialize_fill_value (fill_value .imag , dtype ),
401+         ]
402+     else :
403+         serialized  =  fill_value 
404+ 
405+     return  serialized 
406+ 
407+ 
356408def  _default_fill_value (dtype : np .dtype [Any ]) ->  Any :
357409    """ 
358410    Get the default fill value for a type. 
0 commit comments