1
1
import asyncio
2
2
import logging
3
- from typing import Literal , cast
4
-
5
- import numcodecs .abc
3
+ from typing import TYPE_CHECKING , Literal , cast
6
4
7
5
import zarr
8
6
from zarr import Array , Group
9
- from zarr .abc .codec import ArrayArrayCodec , BytesBytesCodec , Codec
10
7
from zarr .abc .store import Store
11
- from zarr .codecs .blosc import BloscCodec , BloscShuffle
12
- from zarr .codecs .bytes import BytesCodec
13
- from zarr .codecs .gzip import GzipCodec
14
- from zarr .codecs .transpose import TransposeCodec
15
- from zarr .codecs .zstd import ZstdCodec
8
+ from zarr .core .array import v2_to_v3_codecs
16
9
from zarr .core .buffer .core import default_buffer_prototype
17
10
from zarr .core .chunk_key_encodings import V2ChunkKeyEncoding
11
+ from zarr .core .codec_pipeline import codecs_from_list
18
12
from zarr .core .common import (
19
13
ZARR_JSON ,
20
14
ZARRAY_JSON ,
23
17
ZMETADATA_V2_JSON ,
24
18
ZarrFormat ,
25
19
)
26
- from zarr .core .dtype .common import HasEndianness
27
- from zarr .core .dtype .wrapper import TBaseDType , TBaseScalar , ZDType
28
20
from zarr .core .group import GroupMetadata
29
21
from zarr .core .metadata .v2 import ArrayV2Metadata
30
22
from zarr .core .metadata .v3 import ArrayV3Metadata
31
23
from zarr .core .sync import sync
32
- from zarr .registry import get_codec_class
33
24
from zarr .storage import StorePath
34
25
26
+ if TYPE_CHECKING :
27
+ from zarr .abc .codec import Codec
28
+
35
29
_logger = logging .getLogger (__name__ )
36
30
37
31
@@ -186,27 +180,10 @@ async def _metadata_exists(zarr_format: ZarrFormat, store_path: StorePath) -> bo
186
180
def _convert_array_metadata (metadata_v2 : ArrayV2Metadata ) -> ArrayV3Metadata :
187
181
chunk_key_encoding = V2ChunkKeyEncoding (separator = metadata_v2 .dimension_separator )
188
182
189
- codecs : list [Codec ] = []
190
-
191
- # array-array codecs
192
- if metadata_v2 .order == "F" :
193
- # F is equivalent to order: n-1, ... 1, 0
194
- codecs .append (TransposeCodec (order = list (range (len (metadata_v2 .shape ) - 1 , - 1 , - 1 ))))
195
-
196
- if metadata_v2 .filters is not None :
197
- codecs .extend (_convert_filters (metadata_v2 .filters ))
198
-
199
- # array-bytes codecs
200
- if not isinstance (metadata_v2 .dtype , HasEndianness ):
201
- codecs .append (BytesCodec (endian = None ))
202
- else :
203
- codecs .append (BytesCodec (endian = metadata_v2 .dtype .endianness ))
204
-
205
- # bytes-bytes codecs
206
- if metadata_v2 .compressor is not None :
207
- bytes_bytes_codec = _convert_compressor (metadata_v2 .compressor , metadata_v2 .dtype )
208
- codecs .append (bytes_bytes_codec )
209
-
183
+ codecs : tuple [Codec , ...] = ()
184
+ # We first generate a sequence of V3 codecs, then we ensure that this sequence is valid
185
+ aa , ab , bb = codecs_from_list (v2_to_v3_codecs (metadata_v2 ))
186
+ codecs = (* aa , ab , * bb )
210
187
return ArrayV3Metadata (
211
188
shape = metadata_v2 .shape ,
212
189
data_type = metadata_v2 .dtype ,
@@ -220,66 +197,6 @@ def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata:
220
197
)
221
198
222
199
223
- def _convert_filters (filters : tuple [numcodecs .abc .Codec , ...]) -> list [ArrayArrayCodec ]:
224
- filters_codecs = [_find_numcodecs_zarr3 (filter ) for filter in filters ]
225
- for codec in filters_codecs :
226
- if not isinstance (codec , ArrayArrayCodec ):
227
- raise TypeError (f"Filter { type (codec )} is not an ArrayArrayCodec" )
228
-
229
- return cast (list [ArrayArrayCodec ], filters_codecs )
230
-
231
-
232
- def _convert_compressor (
233
- compressor : numcodecs .abc .Codec , dtype : ZDType [TBaseDType , TBaseScalar ]
234
- ) -> BytesBytesCodec :
235
- match compressor .codec_id :
236
- case "blosc" :
237
- return BloscCodec (
238
- typesize = dtype .to_native_dtype ().itemsize ,
239
- cname = compressor .cname ,
240
- clevel = compressor .clevel ,
241
- shuffle = BloscShuffle .from_int (compressor .shuffle ),
242
- blocksize = compressor .blocksize ,
243
- )
244
-
245
- case "zstd" :
246
- return ZstdCodec (
247
- level = compressor .level ,
248
- checksum = compressor .checksum ,
249
- )
250
-
251
- case "gzip" :
252
- return GzipCodec (level = compressor .level )
253
-
254
- case _:
255
- # If possible, find matching zarr.codecs.numcodecs codec
256
- compressor_codec = _find_numcodecs_zarr3 (compressor )
257
-
258
- if not isinstance (compressor_codec , BytesBytesCodec ):
259
- raise TypeError (f"Compressor { type (compressor_codec )} is not a BytesBytesCodec" )
260
-
261
- return compressor_codec
262
-
263
-
264
- def _find_numcodecs_zarr3 (numcodecs_codec : numcodecs .abc .Codec ) -> Codec :
265
- """Find matching zarr.codecs.numcodecs codec (if it exists)"""
266
-
267
- numcodec_name = f"numcodecs.{ numcodecs_codec .codec_id } "
268
- numcodec_dict = {
269
- "name" : numcodec_name ,
270
- "configuration" : numcodecs_codec .get_config (),
271
- }
272
-
273
- try :
274
- codec_v3 = get_codec_class (numcodec_name )
275
- except KeyError as exc :
276
- raise ValueError (
277
- f"Couldn't find corresponding zarr.codecs.numcodecs codec for { numcodecs_codec .codec_id } "
278
- ) from exc
279
-
280
- return codec_v3 .from_dict (numcodec_dict )
281
-
282
-
283
200
async def _save_v3_metadata (
284
201
metadata_v3 : ArrayV3Metadata | GroupMetadata , output_path : StorePath , dry_run : bool = False
285
202
) -> None :
0 commit comments