25
25
import zarr
26
26
from zarr .abc .codec import ArrayArrayCodec , ArrayBytesCodec , BytesBytesCodec , Codec
27
27
from zarr .abc .numcodec import Numcodec
28
+ from zarr .codecs ._v2 import NumcodecWrapper
28
29
from zarr .codecs .bytes import BytesCodec
29
30
from zarr .codecs .transpose import TransposeCodec
30
31
from zarr .codecs .vlen_utf8 import VLenBytesCodec , VLenUTF8Codec
109
110
ArrayV3MetadataDict ,
110
111
T_ArrayMetadata ,
111
112
)
113
+ from zarr .core .metadata .common import _parse_codec
112
114
from zarr .core .metadata .io import save_metadata
113
115
from zarr .core .metadata .v2 import (
114
116
CompressorLike_V2 ,
125
127
ZarrUserWarning ,
126
128
)
127
129
from zarr .registry import (
128
- _parse_array_array_codec ,
129
- _parse_array_bytes_codec ,
130
- _parse_bytes_bytes_codec ,
131
130
get_pipeline_class ,
132
131
)
133
132
from zarr .storage ._common import StorePath , ensure_no_existing_node , make_store_path
141
140
142
141
from zarr .abc .codec import CodecPipeline
143
142
from zarr .abc .store import Store
144
- from zarr .codecs ._v2 import NumcodecWrapper
145
143
from zarr .codecs .sharding import ShardingCodecIndexLocation
146
144
from zarr .core .dtype .wrapper import TBaseDType , TBaseScalar
147
145
from zarr .storage import StoreLike
@@ -252,6 +250,60 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None
252
250
raise TypeError # pragma: no cover
253
251
254
252
253
+ def _parse_bytes_bytes_codec (
254
+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
255
+ ) -> BytesBytesCodec :
256
+ """
257
+ Normalize the input to a ``BytesBytesCodec`` instance.
258
+ If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it
259
+ is converted to a ``BytesBytesCodec`` instance via the ``_resolve_codec`` function.
260
+ """
261
+
262
+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
263
+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
264
+ return _codec_or_numcodec_wrapper .to_bytes_bytes ()
265
+ elif isinstance (_codec_or_numcodec_wrapper , BytesBytesCodec ):
266
+ return _codec_or_numcodec_wrapper
267
+ msg = f"Expected a NumcodecWrapper or ArrayBytesCodec or a dict representation thereof; got { data } instead."
268
+ raise ValueError (msg )
269
+
270
+
271
+ def _parse_array_bytes_codec (
272
+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
273
+ ) -> ArrayBytesCodec :
274
+ """
275
+ Normalize the input to a ``ArrayBytesCodec`` instance.
276
+ If the input is already a ``ArrayBytesCodec``, it is returned as is. If the input is a dict, it
277
+ is converted to a ``ArrayBytesCodec`` instance via the ``_resolve_codec`` function.
278
+ """
279
+
280
+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
281
+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
282
+ return _codec_or_numcodec_wrapper .to_array_bytes ()
283
+ elif isinstance (_codec_or_numcodec_wrapper , ArrayBytesCodec ):
284
+ return _codec_or_numcodec_wrapper
285
+ msg = f"Expected a NumcodecWrapper or ArrayBytesCodec or a dict representation thereof; got { data } instead."
286
+ raise ValueError (msg )
287
+
288
+
289
+ def _parse_array_array_codec (
290
+ data : Mapping [str , JSON ] | Codec | Numcodec , * , dtype : ZDType [Any , Any ]
291
+ ) -> ArrayArrayCodec :
292
+ """
293
+ Normalize the input to a ``ArrayArrayCodec`` instance.
294
+ If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it
295
+ is converted to a ``ArrayArrayCodec`` instance via the ``_resolve_codec`` function.
296
+ """
297
+
298
+ _codec_or_numcodec_wrapper = _parse_codec (data , dtype = dtype )
299
+ if isinstance (_codec_or_numcodec_wrapper , NumcodecWrapper ):
300
+ return _codec_or_numcodec_wrapper .to_array_array ()
301
+ elif isinstance (_codec_or_numcodec_wrapper , ArrayArrayCodec ):
302
+ return _codec_or_numcodec_wrapper
303
+ msg = f"Expected a NumcodecWrapper or ArrayArrayCodec or a dict representation thereof; got { data } instead."
304
+ raise ValueError (msg )
305
+
306
+
255
307
async def get_array_metadata (
256
308
store_path : StorePath , zarr_format : ZarrFormat | None = 3
257
309
) -> dict [str , JSON ]:
@@ -5177,17 +5229,15 @@ def _parse_chunk_encoding_v3(
5177
5229
maybe_array_array = (filters ,)
5178
5230
else :
5179
5231
maybe_array_array = cast ("Iterable[Codec | dict[str, JSON]]" , filters )
5180
- out_array_array = tuple (
5181
- _parse_array_array_codec (c , zarr_format = 3 ) for c in maybe_array_array
5182
- )
5232
+ out_array_array = tuple (_parse_array_array_codec (c , dtype = dtype ) for c in maybe_array_array )
5183
5233
5184
5234
if serializer == "auto" :
5185
5235
out_array_bytes = default_serializer_v3 (dtype )
5186
5236
else :
5187
5237
# TODO: ensure that the serializer is compatible with the ndarray produced by the
5188
5238
# array-array codecs. For example, if a sequence of array-array codecs produces an
5189
5239
# array with a single-byte data type, then the serializer should not specify endiannesss.
5190
- out_array_bytes = _parse_array_bytes_codec (serializer , zarr_format = 3 )
5240
+ out_array_bytes = _parse_array_bytes_codec (serializer , dtype = dtype )
5191
5241
5192
5242
if compressors is None :
5193
5243
out_bytes_bytes : tuple [BytesBytesCodec , ...] = ()
@@ -5200,12 +5250,7 @@ def _parse_chunk_encoding_v3(
5200
5250
else :
5201
5251
maybe_bytes_bytes = compressors # type: ignore[assignment]
5202
5252
5203
- out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
5204
-
5205
- # specialize codecs as needed given the dtype
5206
-
5207
- # TODO: refactor so that the config only contains the name of the codec, and we use the dtype
5208
- # to create the codec instance, instead of storing a dict representation of a full codec.
5253
+ out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c , dtype = dtype ) for c in maybe_bytes_bytes )
5209
5254
5210
5255
# TODO: ensure that the serializer is compatible with the ndarray produced by the
5211
5256
# array-array codecs. For example, if a sequence of array-array codecs produces an
0 commit comments