2525import zarr
2626from zarr .abc .codec import ArrayArrayCodec , ArrayBytesCodec , BytesBytesCodec , Codec
2727from zarr .abc .numcodec import Numcodec
28+ from zarr .codecs ._v2 import NumcodecWrapper
2829from zarr .codecs .bytes import BytesCodec
2930from zarr .codecs .transpose import TransposeCodec
3031from zarr .codecs .vlen_utf8 import VLenBytesCodec , VLenUTF8Codec
109110 ArrayV3MetadataDict ,
110111 T_ArrayMetadata ,
111112)
113+ from zarr .core .metadata .common import _parse_codec
112114from zarr .core .metadata .io import save_metadata
113115from zarr .core .metadata .v2 import (
114116 CompressorLike_V2 ,
125127 ZarrUserWarning ,
126128)
127129from zarr .registry import (
128- _parse_array_array_codec ,
129- _parse_array_bytes_codec ,
130- _parse_bytes_bytes_codec ,
131130 get_pipeline_class ,
132131)
133132from zarr .storage ._common import StorePath , ensure_no_existing_node , make_store_path
141140
142141 from zarr .abc .codec import CodecPipeline
143142 from zarr .abc .store import Store
144- from zarr .codecs ._v2 import NumcodecWrapper
145143 from zarr .codecs .sharding import ShardingCodecIndexLocation
146144 from zarr .core .dtype .wrapper import TBaseDType , TBaseScalar
147145 from zarr .storage import StoreLike
@@ -252,6 +250,60 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None
252250 raise TypeError # pragma: no cover
253251
254252
253+ def _parse_bytes_bytes_codec (
254+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
255+ ) -> BytesBytesCodec :
256+ """
257+ Normalize the input to a ``BytesBytesCodec`` instance.
258+ If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it
259+ is converted to a ``BytesBytesCodec`` instance via the ``_resolve_codec`` function.
260+ """
261+
262+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
263+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
264+ return _codec_or_numcodec_wrapper .to_bytes_bytes ()
265+ elif isinstance (_codec_or_numcodec_wrapper , BytesBytesCodec ):
266+ return _codec_or_numcodec_wrapper
267+ msg = f"Expected a NumcodecWrapper or ArrayBytesCodec or a dict representation thereof; got { data } instead."
268+ raise ValueError (msg )
269+
270+
271+ def _parse_array_bytes_codec (
272+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
273+ ) -> ArrayBytesCodec :
274+ """
275+ Normalize the input to a ``ArrayBytesCodec`` instance.
276+ If the input is already a ``ArrayBytesCodec``, it is returned as is. If the input is a dict, it
277+ is converted to a ``ArrayBytesCodec`` instance via the ``_resolve_codec`` function.
278+ """
279+
280+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
281+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
282+ return _codec_or_numcodec_wrapper .to_array_bytes ()
283+ elif isinstance (_codec_or_numcodec_wrapper , ArrayBytesCodec ):
284+ return _codec_or_numcodec_wrapper
285+ msg = f"Expected a NumcodecWrapper or ArrayBytesCodec or a dict representation thereof; got { data } instead."
286+ raise ValueError (msg )
287+
288+
289+ def _parse_array_array_codec (
290+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
291+ ) -> ArrayArrayCodec :
292+ """
293+ Normalize the input to a ``ArrayArrayCodec`` instance.
294+ If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it
295+ is converted to a ``ArrayArrayCodec`` instance via the ``_resolve_codec`` function.
296+ """
297+
298+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
299+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
300+ return _codec_or_numcodec_wrapper .to_array_array ()
301+ elif isinstance (_codec_or_numcodec_wrapper , ArrayArrayCodec ):
302+ return _codec_or_numcodec_wrapper
303+ msg = f"Expected a NumcodecWrapper or ArrayArrayCodec or a dict representation thereof; got { data } instead."
304+ raise ValueError (msg )
305+
306+
255307async def get_array_metadata (
256308 store_path : StorePath , zarr_format : ZarrFormat | None = 3
257309) -> dict [str , JSON ]:
@@ -5177,17 +5229,15 @@ def _parse_chunk_encoding_v3(
51775229 maybe_array_array = (filters ,)
51785230 else :
51795231 maybe_array_array = cast ("Iterable[Codec | dict[str, JSON]]" , filters )
5180- out_array_array = tuple (
5181- _parse_array_array_codec (c , zarr_format = 3 ) for c in maybe_array_array
5182- )
5232+ out_array_array = tuple (_parse_array_array_codec (c , dtype = dtype ) for c in maybe_array_array )
51835233
51845234 if serializer == "auto" :
51855235 out_array_bytes = default_serializer_v3 (dtype )
51865236 else :
51875237 # TODO: ensure that the serializer is compatible with the ndarray produced by the
51885238 # array-array codecs. For example, if a sequence of array-array codecs produces an
51895239 # array with a single-byte data type, then the serializer should not specify endiannesss.
5190- out_array_bytes = _parse_array_bytes_codec (serializer , zarr_format = 3 )
5240+ out_array_bytes = _parse_array_bytes_codec (serializer , dtype = dtype )
51915241
51925242 if compressors is None :
51935243 out_bytes_bytes : tuple [BytesBytesCodec , ...] = ()
@@ -5200,12 +5250,7 @@ def _parse_chunk_encoding_v3(
52005250 else :
52015251 maybe_bytes_bytes = compressors # type: ignore[assignment]
52025252
5203- out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
5204-
5205- # specialize codecs as needed given the dtype
5206-
5207- # TODO: refactor so that the config only contains the name of the codec, and we use the dtype
5208- # to create the codec instance, instead of storing a dict representation of a full codec.
5253+ out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c , dtype = dtype ) for c in maybe_bytes_bytes )
52095254
52105255 # TODO: ensure that the serializer is compatible with the ndarray produced by the
52115256 # array-array codecs. For example, if a sequence of array-array codecs produces an
0 commit comments