11from __future__ import annotations
22
33import asyncio
4- from collections .abc import Mapping
54from dataclasses import dataclass , replace
5+ from enum import Enum
66from functools import cached_property
7- from typing import (
8- TYPE_CHECKING ,
9- Final ,
10- Literal ,
11- NotRequired ,
12- TypedDict ,
13- TypeGuard ,
14- overload ,
15- )
7+ from typing import TYPE_CHECKING
168
179import numcodecs
1810from numcodecs .blosc import Blosc
1911from packaging .version import Version
20- from typing_extensions import ReadOnly
21-
22- from zarr .abc .codec import BytesBytesCodec , CodecJSON
23- from zarr .core .common import (
24- JSON ,
25- NamedRequiredConfig ,
26- ZarrFormat ,
27- )
12+
13+ from zarr .abc .codec import BytesBytesCodec
14+ from zarr .core .buffer .cpu import as_numpy_array_wrapper
15+ from zarr .core .common import JSON , parse_enum , parse_named_configuration
2816from zarr .core .dtype .common import HasItemSize
29- from zarr .errors import CodecValidationError
30- from zarr .registry import register_codec
3117
3218if TYPE_CHECKING :
3319 from typing import Self
3420
3521 from zarr .core .array_spec import ArraySpec
3622 from zarr .core .buffer import Buffer
3723
38- BloscShuffle = Literal ["noshuffle" , "shuffle" , "bitshuffle" ]
39- BLOSC_SHUFFLE : Final = ("noshuffle" , "shuffle" , "bitshuffle" )
40-
41- BloscCname = Literal ["lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" ]
42- BLOSC_CNAME : Final = ("lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" )
43-
44-
45- class BloscConfigV2 (TypedDict ):
46- cname : BloscCname
47- clevel : int
48- shuffle : int
49- blocksize : int
50- typesize : NotRequired [int ]
51-
5224
53- class BloscConfigV3 (TypedDict ):
54- cname : BloscCname
55- clevel : int
56- shuffle : BloscShuffle
57- blocksize : int
58- typesize : int
59-
60-
61- class BloscJSON_V2 (BloscConfigV2 ):
25+ class BloscShuffle (Enum ):
6226 """
63- The JSON form of the Blosc codec in Zarr V2 .
27+ Enum for shuffle filter used by blosc .
6428 """
6529
66- id : ReadOnly [Literal ["blosc" ]]
30+ noshuffle = "noshuffle"
31+ shuffle = "shuffle"
32+ bitshuffle = "bitshuffle"
6733
34+ @classmethod
35+ def from_int (cls , num : int ) -> BloscShuffle :
36+ blosc_shuffle_int_to_str = {
37+ 0 : "noshuffle" ,
38+ 1 : "shuffle" ,
39+ 2 : "bitshuffle" ,
40+ }
41+ if num not in blosc_shuffle_int_to_str :
42+ raise ValueError (f"Value must be between 0 and 2. Got { num } ." )
43+ return BloscShuffle [blosc_shuffle_int_to_str [num ]]
6844
69- class BloscJSON_V3 (NamedRequiredConfig [Literal ["blosc" ], BloscConfigV3 ]):
45+
46+ class BloscCname (Enum ):
7047 """
71- The JSON form of the Blosc codec in Zarr V3 .
48+ Enum for compression library used by blosc .
7249 """
7350
74-
75- def check_json_v2 (data : CodecJSON ) -> TypeGuard [BloscJSON_V2 ]:
76- return (
77- isinstance (data , Mapping )
78- and set (data .keys ()) == {"id" , "clevel" , "cname" , "shuffle" , "blocksize" }
79- and data ["id" ] == "blosc"
80- )
81-
82-
83- def check_json_v3 (data : CodecJSON ) -> TypeGuard [BloscJSON_V3 ]:
84- return (
85- isinstance (data , Mapping )
86- and set (data .keys ()) == {"name" , "configuration" }
87- and data ["name" ] == "blosc"
88- and isinstance (data ["configuration" ], Mapping )
89- and set (data ["configuration" ].keys ())
90- == {"cname" , "clevel" , "shuffle" , "blocksize" , "typesize" }
91- )
92-
93-
94- def parse_cname (value : object ) -> BloscCname :
95- if value not in BLOSC_CNAME :
96- raise ValueError (f"Value must be one of { BLOSC_CNAME } . Got { value } instead." )
97- return value
51+ lz4 = "lz4"
52+ lz4hc = "lz4hc"
53+ blosclz = "blosclz"
54+ zstd = "zstd"
55+ snappy = "snappy"
56+ zlib = "zlib"
9857
9958
10059# See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -125,35 +84,31 @@ def parse_blocksize(data: JSON) -> int:
12584 raise TypeError (f"Value should be an int. Got { type (data )} instead." )
12685
12786
128- def parse_shuffle (data : object ) -> BloscShuffle :
129- if data in BLOSC_SHUFFLE :
130- return data # type: ignore[return-value]
131- raise TypeError (f"Value must be one of { BLOSC_SHUFFLE } . Got { data } instead." )
132-
133-
13487@dataclass (frozen = True )
13588class BloscCodec (BytesBytesCodec ):
89+ """blosc codec"""
90+
13691 is_fixed_size = False
13792
13893 typesize : int | None
139- cname : BloscCname
140- clevel : int
141- shuffle : BloscShuffle | None
142- blocksize : int
94+ cname : BloscCname = BloscCname . zstd
95+ clevel : int = 5
96+ shuffle : BloscShuffle | None = BloscShuffle . noshuffle
97+ blocksize : int = 0
14398
14499 def __init__ (
145100 self ,
146101 * ,
147102 typesize : int | None = None ,
148- cname : BloscCname = " zstd" ,
103+ cname : BloscCname | str = BloscCname . zstd ,
149104 clevel : int = 5 ,
150- shuffle : BloscShuffle | None = None ,
105+ shuffle : BloscShuffle | str | None = None ,
151106 blocksize : int = 0 ,
152107 ) -> None :
153108 typesize_parsed = parse_typesize (typesize ) if typesize is not None else None
154- cname_parsed = parse_cname (cname )
109+ cname_parsed = parse_enum (cname , BloscCname )
155110 clevel_parsed = parse_clevel (clevel )
156- shuffle_parsed = parse_shuffle (shuffle ) if shuffle is not None else None
111+ shuffle_parsed = parse_enum (shuffle , BloscShuffle ) if shuffle is not None else None
157112 blocksize_parsed = parse_blocksize (blocksize )
158113
159114 object .__setattr__ (self , "typesize" , typesize_parsed )
@@ -164,74 +119,24 @@ def __init__(
164119
165120 @classmethod
166121 def from_dict (cls , data : dict [str , JSON ]) -> Self :
167- return cls .from_json (data , zarr_format = 3 )
122+ _ , configuration_parsed = parse_named_configuration (data , "blosc" )
123+ return cls (** configuration_parsed ) # type: ignore[arg-type]
168124
169125 def to_dict (self ) -> dict [str , JSON ]:
170- return self .to_json (zarr_format = 3 )
171-
172- @classmethod
173- def _from_json_v2 (cls , data : CodecJSON ) -> Self :
174- if check_json_v2 (data ):
175- return cls (
176- cname = data ["cname" ],
177- clevel = data ["clevel" ],
178- shuffle = BLOSC_SHUFFLE [data ["shuffle" ]],
179- blocksize = data ["blocksize" ],
180- typesize = data .get ("typesize" , None ),
181- )
182- msg = (
183- "Invalid Zarr V2 JSON representation of the blosc codec. "
184- f"Got { data !r} , expected a Mapping with keys ('id', 'cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
185- )
186- raise CodecValidationError (msg )
187-
188- @classmethod
189- def _from_json_v3 (cls , data : CodecJSON ) -> Self :
190- if check_json_v3 (data ):
191- return cls (
192- typesize = data ["configuration" ]["typesize" ],
193- cname = data ["configuration" ]["cname" ],
194- clevel = data ["configuration" ]["clevel" ],
195- shuffle = data ["configuration" ]["shuffle" ],
196- blocksize = data ["configuration" ]["blocksize" ],
197- )
198- msg = (
199- "Invalid Zarr V3 JSON representation of the blosc codec. "
200- f"Got { data !r} , expected a Mapping with keys ('name', 'configuration')"
201- "Where the 'configuration' key is a Mapping with keys ('cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
202- )
203- raise CodecValidationError (msg )
204-
205- @overload
206- def to_json (self , zarr_format : Literal [2 ]) -> BloscJSON_V2 : ...
207- @overload
208- def to_json (self , zarr_format : Literal [3 ]) -> BloscJSON_V3 : ...
209-
210- def to_json (self , zarr_format : ZarrFormat ) -> BloscJSON_V2 | BloscJSON_V3 :
211- if self .typesize is None or self .shuffle is None :
212- raise ValueError ("typesize and blocksize need to be set for encoding." )
213- if zarr_format == 2 :
214- return {
215- "id" : "blosc" ,
126+ if self .typesize is None :
127+ raise ValueError ("`typesize` needs to be set for serialization." )
128+ if self .shuffle is None :
129+ raise ValueError ("`shuffle` needs to be set for serialization." )
130+ return {
131+ "name" : "blosc" ,
132+ "configuration" : {
133+ "typesize" : self .typesize ,
134+ "cname" : self .cname .value ,
216135 "clevel" : self .clevel ,
217- "cname" : self .cname ,
218- "shuffle" : BLOSC_SHUFFLE .index (self .shuffle ),
136+ "shuffle" : self .shuffle .value ,
219137 "blocksize" : self .blocksize ,
220- }
221- elif zarr_format == 3 :
222- return {
223- "name" : "blosc" ,
224- "configuration" : {
225- "clevel" : self .clevel ,
226- "cname" : self .cname ,
227- "shuffle" : self .shuffle ,
228- "typesize" : self .typesize ,
229- "blocksize" : self .blocksize ,
230- },
231- }
232- raise ValueError (
233- f"Unsupported Zarr format { zarr_format } . Expected 2 or 3."
234- ) # pragma: no cover
138+ },
139+ }
235140
236141 def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
237142 item_size = 1
@@ -241,18 +146,26 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
241146 if new_codec .typesize is None :
242147 new_codec = replace (new_codec , typesize = item_size )
243148 if new_codec .shuffle is None :
244- new_codec = replace (new_codec , shuffle = "bitshuffle" if item_size == 1 else "shuffle" )
149+ new_codec = replace (
150+ new_codec ,
151+ shuffle = (BloscShuffle .bitshuffle if item_size == 1 else BloscShuffle .shuffle ),
152+ )
245153
246154 return new_codec
247155
248156 @cached_property
249157 def _blosc_codec (self ) -> Blosc :
250158 if self .shuffle is None :
251159 raise ValueError ("`shuffle` needs to be set for decoding and encoding." )
160+ map_shuffle_str_to_int = {
161+ BloscShuffle .noshuffle : 0 ,
162+ BloscShuffle .shuffle : 1 ,
163+ BloscShuffle .bitshuffle : 2 ,
164+ }
252165 config_dict = {
253- "cname" : self .cname ,
166+ "cname" : self .cname . name ,
254167 "clevel" : self .clevel ,
255- "shuffle" : BLOSC_SHUFFLE . index ( self .shuffle ) ,
168+ "shuffle" : map_shuffle_str_to_int [ self .shuffle ] ,
256169 "blocksize" : self .blocksize ,
257170 }
258171 # See https://github.com/zarr-developers/numcodecs/pull/713
@@ -265,8 +178,6 @@ async def _decode_single(
265178 chunk_bytes : Buffer ,
266179 chunk_spec : ArraySpec ,
267180 ) -> Buffer :
268- from zarr .core .buffer .cpu import as_numpy_array_wrapper
269-
270181 return await asyncio .to_thread (
271182 as_numpy_array_wrapper , self ._blosc_codec .decode , chunk_bytes , chunk_spec .prototype
272183 )
@@ -287,6 +198,3 @@ async def _encode_single(
287198
288199 def compute_encoded_size (self , _input_byte_length : int , _chunk_spec : ArraySpec ) -> int :
289200 raise NotImplementedError
290-
291-
292- register_codec ("blosc" , BloscCodec )
0 commit comments