8989from zarr .core .metadata .v2 import (
9090 _default_compressor ,
9191 _default_filters ,
92+ parse_compressor ,
93+ parse_filters ,
9294)
9395from zarr .core .metadata .v3 import DataType , parse_node_type_array
9496from zarr .core .sync import sync
@@ -164,7 +166,7 @@ async def get_array_metadata(
164166 )
165167 if zarr_json_bytes is not None and zarray_bytes is not None :
166168 # warn and favor v3
167- msg = f"Both zarr.json (zarr v3) and .zarray (zarr v2) metadata objects exist at { store_path } ."
169+ msg = f"Both zarr.json (Zarr v3) and .zarray (Zarr v2) metadata objects exist at { store_path } . Zarr v3 will be used ."
168170 warnings .warn (msg , stacklevel = 1 )
169171 if zarr_json_bytes is None and zarray_bytes is None :
170172 raise FileNotFoundError (store_path )
@@ -667,8 +669,8 @@ async def _create_v2(
667669 config : ArrayConfig ,
668670 dimension_separator : Literal ["." , "/" ] | None = None ,
669671 fill_value : float | None = None ,
670- filters : list [dict [str , JSON ]] | None = None ,
671- compressor : dict [str , JSON ] | None = None ,
672+ filters : Iterable [dict [str , JSON ] | numcodecs . abc . Codec ] | None = None ,
673+ compressor : dict [str , JSON ] | numcodecs . abc . Codec | None = None ,
672674 attributes : dict [str , JSON ] | None = None ,
673675 overwrite : bool = False ,
674676 ) -> AsyncArray [ArrayV2Metadata ]:
@@ -3492,13 +3494,13 @@ def _get_default_codecs(
34923494 else :
34933495 dtype_key = "numeric"
34943496
3495- return default_codecs [dtype_key ]
3497+ return cast ( list [ dict [ str , JSON ]], default_codecs [dtype_key ])
34963498
34973499
34983500FiltersParam : TypeAlias = (
34993501 Iterable [dict [str , JSON ] | Codec ] | Iterable [numcodecs .abc .Codec ] | Literal ["auto" ]
35003502)
3501- CompressionParam : TypeAlias = (
3503+ CompressorsParam : TypeAlias = (
35023504 Iterable [dict [str , JSON ] | Codec ] | Codec | numcodecs .abc .Codec | Literal ["auto" ]
35033505)
35043506
@@ -3512,7 +3514,7 @@ async def create_array(
35123514 chunks : ChunkCoords | Literal ["auto" ] = "auto" ,
35133515 shards : ChunkCoords | Literal ["auto" ] | None = None ,
35143516 filters : FiltersParam = "auto" ,
3515- compressors : CompressionParam = "auto" ,
3517+ compressors : CompressorsParam = "auto" ,
35163518 fill_value : Any | None = 0 ,
35173519 order : MemoryOrder | None = "C" ,
35183520 zarr_format : ZarrFormat | None = 3 ,
@@ -3544,16 +3546,16 @@ async def create_array(
35443546 filters : Iterable[Codec], optional
35453547 Iterable of filters to apply to each chunk of the array, in order, before serializing that
35463548 chunk to bytes.
3547- For Zarr v3, a "filter" is a transformation that takes an array and returns an array,
3549+ For Zarr v3, a "filter" is a codec that takes an array and returns an array,
35483550 and these values must be instances of ``ArrayArrayCodec``, or dict representations
35493551 of ``ArrayArrayCodec``.
35503552 For Zarr v2, a "filter" can be any numcodecs codec; you should ensure that the
35513553 the order if your filters is consistent with the behavior of each filter.
35523554 compressors : Iterable[Codec], optional
35533555 List of compressors to apply to the array. Compressors are applied in order, and after any
35543556 filters are applied (if any are specified).
3555- For Zarr v3, a "compressor" is a transformation that takes a string of bytes and
3556- returns another string of bytes .
3557+ For Zarr v3, a "compressor" is a codec that takes a bytestrea, and
3558+ returns another bytestream .
35573559 For Zarr v2, a "compressor" can be any numcodecs codec.
35583560 fill_value : Any, optional
35593561 Fill value for the array.
@@ -3611,11 +3613,6 @@ async def create_array(
36113613 )
36123614
36133615 raise ValueError (msg )
3614- if filters != "auto" and not all (isinstance (f , numcodecs .abc .Codec ) for f in filters ):
3615- raise TypeError (
3616- "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
3617- )
3618- filters = cast (Iterable [numcodecs .abc .Codec ] | Literal ["auto" ], filters )
36193616 filters_parsed , compressor_parsed = _parse_chunk_encoding_v2 (
36203617 compressor = compressors , filters = filters , dtype = dtype_parsed
36213618 )
@@ -3644,7 +3641,7 @@ async def create_array(
36443641 array_array , array_bytes , bytes_bytes = _parse_chunk_encoding_v3 (
36453642 compressors = compressors , filters = filters , dtype = dtype_parsed
36463643 )
3647- sub_codecs = ( * array_array , array_bytes , * bytes_bytes )
3644+ sub_codecs = cast ( tuple [ Codec , ...], ( * array_array , array_bytes , * bytes_bytes ) )
36483645 codecs_out : tuple [Codec , ...]
36493646 if shard_shape_parsed is not None :
36503647 sharding_codec = ShardingCodec (chunk_shape = chunk_shape_parsed , codecs = sub_codecs )
@@ -3688,7 +3685,7 @@ def _parse_chunk_key_encoding(
36883685 """
36893686 if data is None :
36903687 if zarr_format == 2 :
3691- result = ChunkKeyEncoding .from_dict ({"name" : "v2" , "separator" : "/ " })
3688+ result = ChunkKeyEncoding .from_dict ({"name" : "v2" , "separator" : ". " })
36923689 else :
36933690 result = ChunkKeyEncoding .from_dict ({"name" : "default" , "separator" : "/" })
36943691 elif isinstance (data , ChunkKeyEncoding ):
@@ -3769,46 +3766,56 @@ def _get_default_chunk_encoding_v2(
37693766
37703767def _parse_chunk_encoding_v2 (
37713768 * ,
3772- compressor : numcodecs . abc . Codec | Literal [ "auto" ] ,
3773- filters : tuple [ numcodecs . abc . Codec , ...] | Literal [ "auto" ] ,
3769+ compressor : CompressorsParam ,
3770+ filters : FiltersParam ,
37743771 dtype : np .dtype [Any ],
37753772) -> tuple [tuple [numcodecs .abc .Codec , ...] | None , numcodecs .abc .Codec | None ]:
37763773 """
37773774 Generate chunk encoding classes for v2 arrays with optional defaults.
37783775 """
37793776 default_filters , default_compressor = _get_default_chunk_encoding_v2 (dtype )
3780- _filters : tuple [numcodecs .abc .Codec , ...] = ()
3777+
3778+ _filters : tuple [numcodecs .abc .Codec , ...] | None = None
3779+ _compressor : numcodecs .abc .Codec | None = None
3780+
37813781 if compressor == "auto" :
37823782 _compressor = default_compressor
37833783 else :
3784- _compressor = compressor
3784+ if isinstance (compressor , Iterable ):
3785+ raise TypeError ("For Zarr v2 arrays, the `compressor` must be a single codec." )
3786+ _compressor = parse_compressor (compressor )
37853787 if filters == "auto" :
37863788 _filters = default_filters
37873789 else :
3788- _filters = filters
3790+ if not all (isinstance (f , numcodecs .abc .Codec ) for f in filters ):
3791+ raise TypeError (
3792+ "For Zarr v2 arrays, all elements of `filters` must be numcodecs codecs."
3793+ )
3794+ _filters = parse_filters (filters )
3795+
37893796 return _filters , _compressor
37903797
37913798
37923799def _parse_chunk_encoding_v3 (
37933800 * ,
3794- compressors : Iterable [ BytesBytesCodec | dict [ str , JSON ]] | Literal [ "auto" ] ,
3795- filters : Iterable [ ArrayArrayCodec | dict [ str , JSON ]] | Literal [ "auto" ] ,
3801+ compressors : CompressorsParam ,
3802+ filters : FiltersParam ,
37963803 dtype : np .dtype [Any ],
37973804) -> tuple [tuple [ArrayArrayCodec , ...], ArrayBytesCodec , tuple [BytesBytesCodec , ...]]:
37983805 """
37993806 Generate chunk encoding classes for v3 arrays with optional defaults.
38003807 """
38013808 default_array_array , default_array_bytes , default_bytes_bytes = _get_default_encoding_v3 (dtype )
3802- maybe_bytes_bytes : Iterable [BytesBytesCodec | dict [str , JSON ]]
3803- maybe_array_array : Iterable [ArrayArrayCodec | dict [str , JSON ]]
3809+ maybe_bytes_bytes : Iterable [Codec | dict [str , JSON ]]
3810+ maybe_array_array : Iterable [Codec | dict [str , JSON ]]
38043811
38053812 if compressors == "auto" :
38063813 out_bytes_bytes = default_bytes_bytes
38073814 else :
38083815 if isinstance (compressors , dict | Codec ):
38093816 maybe_bytes_bytes = (compressors ,)
38103817 else :
3811- maybe_bytes_bytes = compressors
3818+ maybe_bytes_bytes = cast ( Iterable [ Codec | dict [ str , JSON ]], compressors )
38123819
38133820 out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
38143821
@@ -3818,7 +3825,7 @@ def _parse_chunk_encoding_v3(
38183825 if isinstance (filters , dict | Codec ):
38193826 maybe_array_array = (filters ,)
38203827 else :
3821- maybe_array_array = filters
3828+ maybe_array_array = cast ( Iterable [ Codec | dict [ str , JSON ]], filters )
38223829 out_array_array = tuple (_parse_array_array_codec (c ) for c in maybe_array_array )
38233830
38243831 return out_array_array , default_array_bytes , out_bytes_bytes
0 commit comments