110110 _parse_array_array_codec ,
111111 _parse_array_bytes_codec ,
112112 _parse_bytes_bytes_codec ,
113- _resolve_codec ,
114113 get_pipeline_class ,
115114)
116115from zarr .storage import StoreLike , make_store_path
@@ -469,7 +468,8 @@ async def create(
469468 - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``.
470469 - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``.
471470
472- These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
471+ These defaults can be changed by modifying the value of ``array.v3_default_filters``,
472+ ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`.
473473 dimension_names : Iterable[str], optional
474474 The names of the dimensions (default is None).
475475 Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
@@ -1715,7 +1715,8 @@ def create(
17151715 - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``.
17161716 - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``.
17171717
1718- These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
1718+ These defaults can be changed by modifying the value of ``array.v3_default_filters``,
1719+ ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`.
17191720 dimension_names : Iterable[str], optional
17201721 The names of the dimensions (default is None).
17211722 Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
@@ -3698,17 +3699,9 @@ def _build_parents(
36983699
36993700def _get_default_codecs (
37003701 np_dtype : np .dtype [Any ],
3701- ) -> list [dict [str , JSON ]]:
3702- default_codecs = zarr_config .get ("array.v3_default_codecs" )
3703- dtype = DataType .from_numpy (np_dtype )
3704- if dtype == DataType .string :
3705- dtype_key = "string"
3706- elif dtype == DataType .bytes :
3707- dtype_key = "bytes"
3708- else :
3709- dtype_key = "numeric"
3710-
3711- return cast (list [dict [str , JSON ]], default_codecs [dtype_key ])
3702+ ) -> tuple [Codec , ...]:
3703+ filters , serializer , compressors = _get_default_chunk_encoding_v3 (np_dtype )
3704+ return filters + (serializer ,) + compressors
37123705
37133706
37143707FiltersLike : TypeAlias = (
@@ -3785,9 +3778,8 @@ async def create_array(
37853778 For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
37863779 and these values must be instances of ``ArrayArrayCodec``, or dict representations
37873780 of ``ArrayArrayCodec``.
3788- If ``filters`` and ``compressors`` are not specified, then the default codecs for
3789- Zarr format 3 will be used.
3790- These defaults can be changed by modifying the value of ``array.v3_default_codecs``
3781+ If no ``filters`` are provided, a default set of filters will be used.
3782+ These defaults can be changed by modifying the value of ``array.v3_default_filters``
37913783 in :mod:`zarr.core.config`.
37923784 Use ``None`` to omit default filters.
37933785
@@ -3803,22 +3795,22 @@ async def create_array(
38033795
38043796 For Zarr format 3, a "compressor" is a codec that takes a bytestream, and
38053797 returns another bytestream. Multiple compressors my be provided for Zarr format 3.
3806- If ``filters`` and ``compressors`` are not specified, then the default codecs for
3807- Zarr format 3 will be used.
3808- These defaults can be changed by modifying the value of ``array.v3_default_codecs``
3798+ If no ``compressors`` are provided, a default set of compressors will be used.
3799+ These defaults can be changed by modifying the value of ``array.v3_default_compressors``
38093800 in :mod:`zarr.core.config`.
38103801 Use ``None`` to omit default compressors.
38113802
38123803 For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may
38133804 be provided for Zarr format 2.
3814- If no ``compressors`` are provided, a default compressor will be used.
3815- These defaults can be changed by modifying the value of ``array.v2_default_compressor``
3805+ If no ``compressor`` is provided, a default compressor will be used.
38163806 in :mod:`zarr.core.config`.
38173807 Use ``None`` to omit the default compressor.
38183808 serializer : dict[str, JSON] | ArrayBytesCodec, optional
38193809 Array-to-bytes codec to use for encoding the array data.
38203810 Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion.
3821- If no ``serializer`` is provided, the `zarr.codecs.BytesCodec` codec will be used.
3811+ If no ``serializer`` is provided, a default serializer will be used.
3812+ These defaults can be changed by modifying the value of ``array.v3_default_serializer``
3813+ in :mod:`zarr.core.config`.
38223814 fill_value : Any, optional
38233815 Fill value for the array.
38243816 order : {"C", "F"}, optional
@@ -3997,7 +3989,6 @@ def _get_default_chunk_encoding_v3(
39973989 """
39983990 Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
39993991 """
4000- default_codecs = zarr_config .get ("array.v3_default_codecs" )
40013992 dtype = DataType .from_numpy (np_dtype )
40023993 if dtype == DataType .string :
40033994 dtype_key = "string"
@@ -4006,31 +3997,15 @@ def _get_default_chunk_encoding_v3(
40063997 else :
40073998 dtype_key = "numeric"
40083999
4009- codec_dicts = default_codecs [dtype_key ]
4010- codecs = tuple (_resolve_codec (c ) for c in codec_dicts )
4011- array_bytes_maybe = None
4012- array_array : list [ArrayArrayCodec ] = []
4013- bytes_bytes : list [BytesBytesCodec ] = []
4014-
4015- for codec in codecs :
4016- if isinstance (codec , ArrayBytesCodec ):
4017- if array_bytes_maybe is not None :
4018- raise ValueError (
4019- f"Got two instances of ArrayBytesCodec: { array_bytes_maybe } and { codec } . "
4020- "Only one array-to-bytes codec is allowed."
4021- )
4022- array_bytes_maybe = codec
4023- elif isinstance (codec , ArrayArrayCodec ):
4024- array_array .append (codec )
4025- elif isinstance (codec , BytesBytesCodec ):
4026- bytes_bytes .append (codec )
4027- else :
4028- raise TypeError (f"Unexpected codec type: { type (codec )} " )
4000+ default_filters = zarr_config .get ("array.v3_default_filters" ).get (dtype_key )
4001+ default_serializer = zarr_config .get ("array.v3_default_serializer" ).get (dtype_key )
4002+ default_compressors = zarr_config .get ("array.v3_default_compressors" ).get (dtype_key )
40294003
4030- if array_bytes_maybe is None :
4031- raise ValueError ("Required ArrayBytesCodec was not found." )
4004+ filters = tuple (_parse_array_array_codec (codec_dict ) for codec_dict in default_filters )
4005+ serializer = _parse_array_bytes_codec (default_serializer )
4006+ compressors = tuple (_parse_bytes_bytes_codec (codec_dict ) for codec_dict in default_compressors )
40324007
4033- return tuple ( array_array ), array_bytes_maybe , tuple ( bytes_bytes )
4008+ return filters , serializer , compressors
40344009
40354010
40364011def _get_default_chunk_encoding_v2 (
@@ -4111,34 +4086,15 @@ def _parse_chunk_encoding_v3(
41114086 default_array_array , default_array_bytes , default_bytes_bytes = _get_default_chunk_encoding_v3 (
41124087 dtype
41134088 )
4114- maybe_bytes_bytes : Iterable [Codec | dict [str , JSON ]]
4115- maybe_array_array : Iterable [Codec | dict [str , JSON ]]
4116- out_bytes_bytes : tuple [BytesBytesCodec , ...]
4117- if compressors is None :
4118- out_bytes_bytes = ()
4119-
4120- elif compressors == "auto" :
4121- out_bytes_bytes = default_bytes_bytes
41224089
4123- else :
4124- if isinstance (compressors , dict | Codec ):
4125- maybe_bytes_bytes = (compressors ,)
4126- elif compressors is None :
4127- maybe_bytes_bytes = ()
4128- else :
4129- maybe_bytes_bytes = cast (Iterable [Codec | dict [str , JSON ]], compressors )
4130-
4131- out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
4132- out_array_array : tuple [ArrayArrayCodec , ...]
41334090 if filters is None :
4134- out_array_array = ()
4091+ out_array_array : tuple [ ArrayArrayCodec , ...] = ()
41354092 elif filters == "auto" :
41364093 out_array_array = default_array_array
41374094 else :
4095+ maybe_array_array : Iterable [Codec | dict [str , JSON ]]
41384096 if isinstance (filters , dict | Codec ):
41394097 maybe_array_array = (filters ,)
4140- elif filters is None :
4141- maybe_array_array = ()
41424098 else :
41434099 maybe_array_array = cast (Iterable [Codec | dict [str , JSON ]], filters )
41444100 out_array_array = tuple (_parse_array_array_codec (c ) for c in maybe_array_array )
@@ -4148,6 +4104,19 @@ def _parse_chunk_encoding_v3(
41484104 else :
41494105 out_array_bytes = _parse_array_bytes_codec (serializer )
41504106
4107+ if compressors is None :
4108+ out_bytes_bytes : tuple [BytesBytesCodec , ...] = ()
4109+ elif compressors == "auto" :
4110+ out_bytes_bytes = default_bytes_bytes
4111+ else :
4112+ maybe_bytes_bytes : Iterable [Codec | dict [str , JSON ]]
4113+ if isinstance (compressors , dict | Codec ):
4114+ maybe_bytes_bytes = (compressors ,)
4115+ else :
4116+ maybe_bytes_bytes = cast (Iterable [Codec | dict [str , JSON ]], compressors )
4117+
4118+ out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
4119+
41514120 return out_array_array , out_array_bytes , out_bytes_bytes
41524121
41534122
0 commit comments