11from __future__ import annotations
22
33import asyncio
4+ from collections .abc import Mapping
45from dataclasses import dataclass , replace
5- from enum import Enum
66from functools import cached_property
7- from typing import TYPE_CHECKING
7+ from typing import (
8+ TYPE_CHECKING ,
9+ Final ,
10+ Literal ,
11+ NotRequired ,
12+ TypedDict ,
13+ TypeGuard ,
14+ overload ,
15+ )
816
917import numcodecs
1018from numcodecs .blosc import Blosc
1119from packaging .version import Version
12-
13- from zarr .abc .codec import BytesBytesCodec
14- from zarr .core .buffer .cpu import as_numpy_array_wrapper
15- from zarr .core .common import JSON , parse_enum , parse_named_configuration
20+ from typing_extensions import ReadOnly
21+
22+ from zarr .abc .codec import BytesBytesCodec , CodecJSON
23+ from zarr .core .common import (
24+ JSON ,
25+ NamedRequiredConfig ,
26+ ZarrFormat ,
27+ )
1628from zarr .core .dtype .common import HasItemSize
29+ from zarr .errors import CodecValidationError
1730
1831if TYPE_CHECKING :
1932 from typing import Self
2033
2134 from zarr .core .array_spec import ArraySpec
2235 from zarr .core .buffer import Buffer
2336
37+ BloscShuffle = Literal ["noshuffle" , "shuffle" , "bitshuffle" ]
38+ BLOSC_SHUFFLE : Final = ("noshuffle" , "shuffle" , "bitshuffle" )
39+
40+ BloscCname = Literal ["lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" ]
41+ BLOSC_CNAME : Final = ("lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" )
42+
43+
44+ class BloscConfigV2 (TypedDict ):
45+ cname : BloscCname
46+ clevel : int
47+ shuffle : int
48+ blocksize : int
49+ typesize : NotRequired [int ]
2450
25- class BloscShuffle (Enum ):
51+
52+ class BloscConfigV3 (TypedDict ):
53+ cname : BloscCname
54+ clevel : int
55+ shuffle : BloscShuffle
56+ blocksize : int
57+ typesize : int
58+
59+
60+ class BloscJSON_V2 (BloscConfigV2 ):
2661 """
27- Enum for shuffle filter used by blosc .
62+ The JSON form of the Blosc codec in Zarr V2 .
2863 """
2964
30- noshuffle = "noshuffle"
31- shuffle = "shuffle"
32- bitshuffle = "bitshuffle"
33-
34- @classmethod
35- def from_int (cls , num : int ) -> BloscShuffle :
36- blosc_shuffle_int_to_str = {
37- 0 : "noshuffle" ,
38- 1 : "shuffle" ,
39- 2 : "bitshuffle" ,
40- }
41- if num not in blosc_shuffle_int_to_str :
42- raise ValueError (f"Value must be between 0 and 2. Got { num } ." )
43- return BloscShuffle [blosc_shuffle_int_to_str [num ]]
65+ id : ReadOnly [Literal ["blosc" ]]
4466
4567
46- class BloscCname ( Enum ):
68+ class BloscJSON_V3 ( NamedRequiredConfig [ Literal [ "blosc" ], BloscConfigV3 ] ):
4769 """
48- Enum for compression library used by blosc .
70+ The JSON form of the Blosc codec in Zarr V3 .
4971 """
5072
51- lz4 = "lz4"
52- lz4hc = "lz4hc"
53- blosclz = "blosclz"
54- zstd = "zstd"
55- snappy = "snappy"
56- zlib = "zlib"
73+
74+ def check_json_v2 (data : CodecJSON ) -> TypeGuard [BloscJSON_V2 ]:
75+ return (
76+ isinstance (data , Mapping )
77+ and set (data .keys ()) == {"id" , "clevel" , "cname" , "shuffle" , "blocksize" }
78+ and data ["id" ] == "blosc"
79+ )
80+
81+
82+ def check_json_v3 (data : CodecJSON ) -> TypeGuard [BloscJSON_V3 ]:
83+ return (
84+ isinstance (data , Mapping )
85+ and set (data .keys ()) == {"name" , "configuration" }
86+ and data ["name" ] == "blosc"
87+ and isinstance (data ["configuration" ], Mapping )
88+ and set (data ["configuration" ].keys ())
89+ == {"cname" , "clevel" , "shuffle" , "blocksize" , "typesize" }
90+ )
91+
92+
93+ def parse_cname (value : object ) -> BloscCname :
94+ if value not in BLOSC_CNAME :
95+ raise ValueError (f"Value must be one of { BLOSC_CNAME } . Got { value } instead." )
96+ return value
5797
5898
5999# See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -84,31 +124,35 @@ def parse_blocksize(data: JSON) -> int:
84124 raise TypeError (f"Value should be an int. Got { type (data )} instead." )
85125
86126
127+ def parse_shuffle (data : object ) -> BloscShuffle :
128+ if data in BLOSC_SHUFFLE :
129+ return data # type: ignore[return-value]
130+ raise TypeError (f"Value must be one of { BLOSC_SHUFFLE } . Got { data } instead." )
131+
132+
87133@dataclass (frozen = True )
88134class BloscCodec (BytesBytesCodec ):
89- """blosc codec"""
90-
91135 is_fixed_size = False
92136
93137 typesize : int | None
94- cname : BloscCname = BloscCname . zstd
95- clevel : int = 5
96- shuffle : BloscShuffle | None = BloscShuffle . noshuffle
97- blocksize : int = 0
138+ cname : BloscCname
139+ clevel : int
140+ shuffle : BloscShuffle | None
141+ blocksize : int
98142
99143 def __init__ (
100144 self ,
101145 * ,
102146 typesize : int | None = None ,
103- cname : BloscCname | str = BloscCname . zstd ,
147+ cname : BloscCname = " zstd" ,
104148 clevel : int = 5 ,
105- shuffle : BloscShuffle | str | None = None ,
149+ shuffle : BloscShuffle | None = None ,
106150 blocksize : int = 0 ,
107151 ) -> None :
108152 typesize_parsed = parse_typesize (typesize ) if typesize is not None else None
109- cname_parsed = parse_enum (cname , BloscCname )
153+ cname_parsed = parse_cname (cname )
110154 clevel_parsed = parse_clevel (clevel )
111- shuffle_parsed = parse_enum (shuffle , BloscShuffle ) if shuffle is not None else None
155+ shuffle_parsed = parse_shuffle (shuffle ) if shuffle is not None else None
112156 blocksize_parsed = parse_blocksize (blocksize )
113157
114158 object .__setattr__ (self , "typesize" , typesize_parsed )
@@ -119,24 +163,74 @@ def __init__(
119163
120164 @classmethod
121165 def from_dict (cls , data : dict [str , JSON ]) -> Self :
122- _ , configuration_parsed = parse_named_configuration (data , "blosc" )
123- return cls (** configuration_parsed ) # type: ignore[arg-type]
166+ return cls .from_json (data , zarr_format = 3 )
124167
125168 def to_dict (self ) -> dict [str , JSON ]:
126- if self .typesize is None :
127- raise ValueError ("`typesize` needs to be set for serialization." )
128- if self .shuffle is None :
129- raise ValueError ("`shuffle` needs to be set for serialization." )
130- return {
131- "name" : "blosc" ,
132- "configuration" : {
133- "typesize" : self .typesize ,
134- "cname" : self .cname .value ,
169+ return self .to_json (zarr_format = 3 )
170+
171+ @classmethod
172+ def _from_json_v2 (cls , data : CodecJSON ) -> Self :
173+ if check_json_v2 (data ):
174+ return cls (
175+ cname = data ["cname" ],
176+ clevel = data ["clevel" ],
177+ shuffle = BLOSC_SHUFFLE [data ["shuffle" ]],
178+ blocksize = data ["blocksize" ],
179+ typesize = data .get ("typesize" , None ),
180+ )
181+ msg = (
182+ "Invalid Zarr V2 JSON representation of the blosc codec. "
183+ f"Got { data !r} , expected a Mapping with keys ('id', 'cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
184+ )
185+ raise CodecValidationError (msg )
186+
187+ @classmethod
188+ def _from_json_v3 (cls , data : CodecJSON ) -> Self :
189+ if check_json_v3 (data ):
190+ return cls (
191+ typesize = data ["configuration" ]["typesize" ],
192+ cname = data ["configuration" ]["cname" ],
193+ clevel = data ["configuration" ]["clevel" ],
194+ shuffle = data ["configuration" ]["shuffle" ],
195+ blocksize = data ["configuration" ]["blocksize" ],
196+ )
197+ msg = (
198+ "Invalid Zarr V3 JSON representation of the blosc codec. "
199+ f"Got { data !r} , expected a Mapping with keys ('name', 'configuration')"
200+ "Where the 'configuration' key is a Mapping with keys ('cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
201+ )
202+ raise CodecValidationError (msg )
203+
204+ @overload
205+ def to_json (self , zarr_format : Literal [2 ]) -> BloscJSON_V2 : ...
206+ @overload
207+ def to_json (self , zarr_format : Literal [3 ]) -> BloscJSON_V3 : ...
208+
209+ def to_json (self , zarr_format : ZarrFormat ) -> BloscJSON_V2 | BloscJSON_V3 :
210+ if self .typesize is None or self .shuffle is None :
211+ raise ValueError ("typesize and blocksize need to be set for encoding." )
212+ if zarr_format == 2 :
213+ return {
214+ "id" : "blosc" ,
135215 "clevel" : self .clevel ,
136- "shuffle" : self .shuffle .value ,
216+ "cname" : self .cname ,
217+ "shuffle" : BLOSC_SHUFFLE .index (self .shuffle ),
137218 "blocksize" : self .blocksize ,
138- },
139- }
219+ }
220+ elif zarr_format == 3 :
221+ return {
222+ "name" : "blosc" ,
223+ "configuration" : {
224+ "clevel" : self .clevel ,
225+ "cname" : self .cname ,
226+ "shuffle" : self .shuffle ,
227+ "typesize" : self .typesize ,
228+ "blocksize" : self .blocksize ,
229+ },
230+ }
231+ raise ValueError (
232+ f"Unsupported Zarr format { zarr_format } . Expected 2 or 3."
233+ ) # pragma: no cover
140234
141235 def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
142236 item_size = 1
@@ -146,26 +240,18 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
146240 if new_codec .typesize is None :
147241 new_codec = replace (new_codec , typesize = item_size )
148242 if new_codec .shuffle is None :
149- new_codec = replace (
150- new_codec ,
151- shuffle = (BloscShuffle .bitshuffle if item_size == 1 else BloscShuffle .shuffle ),
152- )
243+ new_codec = replace (new_codec , shuffle = "bitshuffle" if item_size == 1 else "shuffle" )
153244
154245 return new_codec
155246
156247 @cached_property
157248 def _blosc_codec (self ) -> Blosc :
158249 if self .shuffle is None :
159250 raise ValueError ("`shuffle` needs to be set for decoding and encoding." )
160- map_shuffle_str_to_int = {
161- BloscShuffle .noshuffle : 0 ,
162- BloscShuffle .shuffle : 1 ,
163- BloscShuffle .bitshuffle : 2 ,
164- }
165251 config_dict = {
166- "cname" : self .cname . name ,
252+ "cname" : self .cname ,
167253 "clevel" : self .clevel ,
168- "shuffle" : map_shuffle_str_to_int [ self .shuffle ] ,
254+ "shuffle" : BLOSC_SHUFFLE . index ( self .shuffle ) ,
169255 "blocksize" : self .blocksize ,
170256 }
171257 # See https://github.com/zarr-developers/numcodecs/pull/713
@@ -178,6 +264,8 @@ async def _decode_single(
178264 chunk_bytes : Buffer ,
179265 chunk_spec : ArraySpec ,
180266 ) -> Buffer :
267+ from zarr .core .buffer .cpu import as_numpy_array_wrapper
268+
181269 return await asyncio .to_thread (
182270 as_numpy_array_wrapper , self ._blosc_codec .decode , chunk_bytes , chunk_spec .prototype
183271 )
0 commit comments