11from __future__ import annotations
22
3+ import json
34import math
45from collections .abc import Mapping
6+ from importlib .metadata import version
57from typing import (
68 TYPE_CHECKING ,
79 Annotated ,
1719 overload ,
1820)
1921
20- import numcodecs
2122import numpy as np
2223import numpy .typing as npt
2324import zarr
2425from numcodecs .abc import Codec
26+ from packaging .version import Version
2527from pydantic import AfterValidator , BaseModel , field_validator , model_validator
2628from pydantic .functional_validators import BeforeValidator
29+ from zarr .core .array import Array , AsyncArray
2730from zarr .core .metadata import ArrayV2Metadata
31+ from zarr .core .sync import sync
2832from zarr .errors import ContainsArrayError , ContainsGroupError
33+ from zarr .storage ._common import make_store_path
2934
3035from pydantic_zarr .core import (
3136 IncEx ,
3237 StrictBase ,
33- contains_array ,
34- contains_group ,
3538 ensure_key_no_path ,
39+ maybe_node ,
3640 model_like ,
37- stringify_dtype ,
41+ parse_dtype_v2 ,
3842)
3943
4044if TYPE_CHECKING :
4145 from zarr .abc .store import Store
46+ from zarr .core .array_spec import ArrayConfigParams
4247
4348TBaseAttr : TypeAlias = Mapping [str , object ] | BaseModel
4449TBaseItem : TypeAlias = Union ["GroupSpec" , "ArraySpec" ]
4954TAttr = TypeVar ("TAttr" , bound = TBaseAttr )
5055TItem = TypeVar ("TItem" , bound = TBaseItem )
5156
52- DtypeStr = Annotated [str , BeforeValidator (stringify_dtype )]
57+ DtypeStr = Annotated [str , BeforeValidator (parse_dtype_v2 )]
58+
59+ BoolFillValue = bool
60+ IntFillValue = int
61+ # todo: introduce a type that represents hexadecimal representations of floats
62+ FloatFillValue = Literal ["Infinity" , "-Infinity" , "NaN" ] | float
63+ ComplexFillValue = tuple [FloatFillValue , FloatFillValue ]
64+ RawFillValue = tuple [int , ...]
65+
66+ FillValue = (
67+ BoolFillValue | IntFillValue | FloatFillValue | ComplexFillValue | RawFillValue | str | None
68+ )
5369
5470DimensionSeparator = Literal ["." , "/" ]
5571MemoryOrder = Literal ["C" , "F" ]
@@ -155,8 +171,8 @@ class ArraySpec(NodeSpec, Generic[TAttr]):
155171 attributes : TAttr = cast (TAttr , {})
156172 shape : tuple [int , ...]
157173 chunks : tuple [int , ...]
158- dtype : DtypeStr
159- fill_value : int | float | None = 0
174+ dtype : DtypeStr | list [ tuple [ Any , ...]]
175+ fill_value : FillValue = 0
160176 order : MemoryOrder = "C"
161177 filters : list [CodecDict ] | None = None
162178 dimension_separator : Annotated [
@@ -285,7 +301,7 @@ def from_array(
285301
286302 return cls (
287303 shape = shape_actual ,
288- dtype = stringify_dtype (dtype_actual ),
304+ dtype = parse_dtype_v2 (dtype_actual ),
289305 chunks = chunks_actual ,
290306 attributes = attributes_actual ,
291307 fill_value = fill_value_actual ,
@@ -322,40 +338,25 @@ def from_zarr(cls, array: zarr.Array) -> Self:
322338 msg = "Array is not a Zarr format 2 array"
323339 raise TypeError (msg )
324340
325- if len (array .compressors ):
326- compressor = array .compressors [0 ]
327- if TYPE_CHECKING :
328- # TODO: overload array.compressors in zarr-python and remove this type check
329- assert isinstance (compressor , Codec )
330- compressor_dict = compressor .get_config ()
341+ if Version (version ("zarr" )) < Version ("3.1.0" ):
342+ from zarr .core .buffer import default_buffer_prototype
343+
344+ stored_meta = array .metadata .to_buffer_dict (prototype = default_buffer_prototype ())
345+ meta_json = json .loads (stored_meta [".zarray" ].to_bytes ()) | {
346+ "attributes" : array .attrs .asdict ()
347+ }
331348 else :
332- compressor_dict = None
349+ meta_json = array . metadata . to_dict ()
333350
334- return cls (
335- shape = array .shape ,
336- chunks = array .chunks ,
337- dtype = str (array .dtype ),
338- # explicitly cast to numpy type and back to python
339- # so that int 0 isn't serialized as 0.0
340- fill_value = (
341- array .dtype .type (array .fill_value ).tolist ()
342- if array .fill_value is not None
343- else array .fill_value
344- ),
345- order = array .order ,
346- filters = array .filters ,
347- dimension_separator = array .metadata .dimension_separator ,
348- compressor = compressor_dict ,
349- attributes = array .attrs .asdict (),
350- )
351+ return cls .model_validate (meta_json )
351352
352353 def to_zarr (
353354 self ,
354355 store : Store ,
355356 path : str ,
356357 * ,
357358 overwrite : bool = False ,
358- ** kwargs : Any ,
359+ config : ArrayConfigParams | None = None ,
359360 ) -> zarr .Array :
360361 """
361362 Serialize an `ArraySpec` to a Zarr array at a specific path in a Zarr store. This operation
@@ -369,36 +370,32 @@ def to_zarr(
369370 The location of the array inside the store.
370371 overwrite : bool, default = False
371372 Whether to overwrite existing objects in storage to create the Zarr array.
372- **kwargs : Any
373- Additional keyword arguments are passed to `zarr.create` .
373+ config : ArrayConfigParams | None, default = None
374+ An instance of `ArrayConfigParams` that defines the runtime configuration for the array .
374375
375376 Returns
376377 -------
377378 zarr.Array
378379 A Zarr array that is structurally identical to `self`.
379380 """
380- spec_dict = self .model_dump ()
381- attrs = spec_dict .pop ("attributes" )
382- if self .compressor is not None :
383- spec_dict ["compressor" ] = numcodecs .get_codec (spec_dict ["compressor" ])
384- if self .filters is not None :
385- spec_dict ["filters" ] = [numcodecs .get_codec (f ) for f in spec_dict ["filters" ]]
386- if contains_array (store , path ):
387- extant_array = zarr .open_array (store , path = path , zarr_format = 2 )
388-
389- if not self .like (extant_array ):
390- if not overwrite :
391- raise ContainsArrayError (store , path )
381+ store_path = sync (make_store_path (store , path = path ))
382+
383+ extant_node = maybe_node (store , path , zarr_format = 2 )
384+ if isinstance (extant_node , zarr .Array ):
385+ if not self .like (extant_node ) and not overwrite :
386+ raise ContainsArrayError (store , path )
392387 else :
388+ # If there's an existing array that is identical to the model, and overwrite is False,
389+ # we can just return that existing array.
393390 if not overwrite :
394- # extant_array is read-only, so we make a new array handle that
395- # takes **kwargs
396- return zarr . open_array (
397- store = extant_array . store , path = extant_array . path , zarr_format = 2 , ** kwargs
398- )
399- result = zarr . create ( store = store , path = path , overwrite = overwrite , ** spec_dict , ** kwargs )
400- result . attrs . put ( attrs )
401- return result
391+ return extant_node
392+ if isinstance ( extant_node , zarr . Group ) and not overwrite :
393+ raise ContainsGroupError ( store , path )
394+
395+ meta : ArrayV2Metadata = ArrayV2Metadata . from_dict ( self . model_dump () )
396+ async_array = AsyncArray ( metadata = meta , store_path = store_path , config = config )
397+ sync ( async_array . _save_metadata ( meta ) )
398+ return Array ( _async_array = async_array )
402399
403400 def like (
404401 self ,
@@ -568,28 +565,34 @@ def to_zarr(
568565 """
569566 spec_dict = self .model_dump (exclude = {"members" : True })
570567 attrs = spec_dict .pop ("attributes" )
571- if contains_group (store , path ):
572- extant_group = zarr . group ( store , path = path , zarr_format = 2 )
573- if not self .like (extant_group ):
568+ extant_node = maybe_node (store , path , zarr_format = 2 )
569+ if isinstance ( extant_node , zarr . Group ):
570+ if not self .like (extant_node ):
574571 if not overwrite :
572+ """
575573 msg = (
576574 f"A group already exists at path {path}. "
577575 "That group is structurally dissimilar to the group you are trying to store."
578576 "Call to_zarr with overwrite=True to overwrite that group."
579577 )
580- raise ContainsGroupError (msg )
578+ """
579+ # Zarr's contains group error uses questionable design and doesn't take a message
580+ raise ContainsGroupError (store , path )
581581 else :
582582 if not overwrite :
583583 # if the extant group is structurally identical to self, and overwrite is false,
584584 # then just return the extant group
585- return extant_group
585+ return extant_node
586586
587- elif contains_array (store , path ) and not overwrite :
587+ elif isinstance (extant_node , zarr .Array ) and not overwrite :
588+ """
588589 msg = (
589590 f"An array already exists at path {path}. "
590591 "Call to_zarr with overwrite=True to overwrite the array."
591592 )
592- raise ContainsArrayError (msg )
593+ """
594+ # Zarr's contains array error uses questionable design and doesn't take a message
595+ raise ContainsArrayError (store , path )
593596 else :
594597 zarr .create_group (store = store , overwrite = overwrite , path = path , zarr_format = 2 )
595598
0 commit comments