11import asyncio
22import logging
3- from typing import Literal , cast
4-
5- import numcodecs .abc
3+ from typing import TYPE_CHECKING , Literal , cast
64
75import zarr
86from zarr import Array , Group
9- from zarr .abc .codec import ArrayArrayCodec , BytesBytesCodec , Codec
107from zarr .abc .store import Store
11- from zarr .codecs .blosc import BloscCodec , BloscShuffle
12- from zarr .codecs .bytes import BytesCodec
13- from zarr .codecs .gzip import GzipCodec
14- from zarr .codecs .transpose import TransposeCodec
15- from zarr .codecs .zstd import ZstdCodec
8+ from zarr .core .array import v2_to_v3_codecs
169from zarr .core .buffer .core import default_buffer_prototype
1710from zarr .core .chunk_key_encodings import V2ChunkKeyEncoding
11+ from zarr .core .codec_pipeline import codecs_from_list
1812from zarr .core .common import (
1913 ZARR_JSON ,
2014 ZARRAY_JSON ,
2317 ZMETADATA_V2_JSON ,
2418 ZarrFormat ,
2519)
26- from zarr .core .dtype .common import HasEndianness
27- from zarr .core .dtype .wrapper import TBaseDType , TBaseScalar , ZDType
2820from zarr .core .group import GroupMetadata
2921from zarr .core .metadata .v2 import ArrayV2Metadata
3022from zarr .core .metadata .v3 import ArrayV3Metadata
3123from zarr .core .sync import sync
32- from zarr .registry import get_codec_class
3324from zarr .storage import StorePath
3425
26+ if TYPE_CHECKING :
27+ from zarr .abc .codec import Codec
28+
3529_logger = logging .getLogger (__name__ )
3630
3731
@@ -186,27 +180,10 @@ async def _metadata_exists(zarr_format: ZarrFormat, store_path: StorePath) -> bo
186180def _convert_array_metadata (metadata_v2 : ArrayV2Metadata ) -> ArrayV3Metadata :
187181 chunk_key_encoding = V2ChunkKeyEncoding (separator = metadata_v2 .dimension_separator )
188182
189- codecs : list [Codec ] = []
190-
191- # array-array codecs
192- if metadata_v2 .order == "F" :
193- # F is equivalent to order: n-1, ... 1, 0
194- codecs .append (TransposeCodec (order = list (range (len (metadata_v2 .shape ) - 1 , - 1 , - 1 ))))
195-
196- if metadata_v2 .filters is not None :
197- codecs .extend (_convert_filters (metadata_v2 .filters ))
198-
199- # array-bytes codecs
200- if not isinstance (metadata_v2 .dtype , HasEndianness ):
201- codecs .append (BytesCodec (endian = None ))
202- else :
203- codecs .append (BytesCodec (endian = metadata_v2 .dtype .endianness ))
204-
205- # bytes-bytes codecs
206- if metadata_v2 .compressor is not None :
207- bytes_bytes_codec = _convert_compressor (metadata_v2 .compressor , metadata_v2 .dtype )
208- codecs .append (bytes_bytes_codec )
209-
183+ codecs : tuple [Codec , ...] = ()
184+ # We first generate a sequence of V3 codecs, then we ensure that this sequence is valid
185+ aa , ab , bb = codecs_from_list (v2_to_v3_codecs (metadata_v2 ))
186+ codecs = (* aa , ab , * bb )
210187 return ArrayV3Metadata (
211188 shape = metadata_v2 .shape ,
212189 data_type = metadata_v2 .dtype ,
@@ -220,66 +197,6 @@ def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata:
220197 )
221198
222199
223- def _convert_filters (filters : tuple [numcodecs .abc .Codec , ...]) -> list [ArrayArrayCodec ]:
224- filters_codecs = [_find_numcodecs_zarr3 (filter ) for filter in filters ]
225- for codec in filters_codecs :
226- if not isinstance (codec , ArrayArrayCodec ):
227- raise TypeError (f"Filter { type (codec )} is not an ArrayArrayCodec" )
228-
229- return cast (list [ArrayArrayCodec ], filters_codecs )
230-
231-
232- def _convert_compressor (
233- compressor : numcodecs .abc .Codec , dtype : ZDType [TBaseDType , TBaseScalar ]
234- ) -> BytesBytesCodec :
235- match compressor .codec_id :
236- case "blosc" :
237- return BloscCodec (
238- typesize = dtype .to_native_dtype ().itemsize ,
239- cname = compressor .cname ,
240- clevel = compressor .clevel ,
241- shuffle = BloscShuffle .from_int (compressor .shuffle ),
242- blocksize = compressor .blocksize ,
243- )
244-
245- case "zstd" :
246- return ZstdCodec (
247- level = compressor .level ,
248- checksum = compressor .checksum ,
249- )
250-
251- case "gzip" :
252- return GzipCodec (level = compressor .level )
253-
254- case _:
255- # If possible, find matching zarr.codecs.numcodecs codec
256- compressor_codec = _find_numcodecs_zarr3 (compressor )
257-
258- if not isinstance (compressor_codec , BytesBytesCodec ):
259- raise TypeError (f"Compressor { type (compressor_codec )} is not a BytesBytesCodec" )
260-
261- return compressor_codec
262-
263-
264- def _find_numcodecs_zarr3 (numcodecs_codec : numcodecs .abc .Codec ) -> Codec :
265- """Find matching zarr.codecs.numcodecs codec (if it exists)"""
266-
267- numcodec_name = f"numcodecs.{ numcodecs_codec .codec_id } "
268- numcodec_dict = {
269- "name" : numcodec_name ,
270- "configuration" : numcodecs_codec .get_config (),
271- }
272-
273- try :
274- codec_v3 = get_codec_class (numcodec_name )
275- except KeyError as exc :
276- raise ValueError (
277- f"Couldn't find corresponding zarr.codecs.numcodecs codec for { numcodecs_codec .codec_id } "
278- ) from exc
279-
280- return codec_v3 .from_dict (numcodec_dict )
281-
282-
283200async def _save_v3_metadata (
284201 metadata_v3 : ArrayV3Metadata | GroupMetadata , output_path : StorePath , dry_run : bool = False
285202) -> None :
0 commit comments