11from __future__ import annotations
22
33import asyncio
4+ from collections .abc import Mapping
45from dataclasses import dataclass , replace
5- from enum import Enum
66from functools import cached_property
7- from typing import TYPE_CHECKING
7+ from typing import TYPE_CHECKING , Final , Literal , NotRequired , TypedDict , TypeGuard , overload
88
99import numcodecs
1010from numcodecs .blosc import Blosc
1111from packaging .version import Version
1212
13- from zarr .abc .codec import BytesBytesCodec
13+ from zarr .abc .codec import BytesBytesCodec , CodecJSON , CodecJSON_V2 , CodecValidationError
1414from zarr .core .buffer .cpu import as_numpy_array_wrapper
15- from zarr .core .common import JSON , parse_enum , parse_named_configuration
15+ from zarr .core .common import (
16+ JSON ,
17+ NamedRequiredConfig ,
18+ ZarrFormat ,
19+ )
1620from zarr .core .dtype .common import HasItemSize
1721from zarr .registry import register_codec
1822
2226 from zarr .core .array_spec import ArraySpec
2327 from zarr .core .buffer import Buffer
2428
29+ BloscShuffle = Literal ["noshuffle" , "shuffle" , "bitshuffle" ]
30+ BLOSC_SHUFFLE : Final = ("noshuffle" , "shuffle" , "bitshuffle" )
2531
26- class BloscShuffle (Enum ):
27- """
28- Enum for shuffle filter used by blosc.
29- """
32+ BloscCname = Literal ["lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" ]
33+ BLOSC_CNAME : Final = ("lz4" , "lz4hc" , "blosclz" , "zstd" , "snappy" , "zlib" )
3034
31- noshuffle = "noshuffle"
32- shuffle = "shuffle"
33- bitshuffle = "bitshuffle"
3435
35- @classmethod
36- def from_int (cls , num : int ) -> BloscShuffle :
37- blosc_shuffle_int_to_str = {
38- 0 : "noshuffle" ,
39- 1 : "shuffle" ,
40- 2 : "bitshuffle" ,
41- }
42- if num not in blosc_shuffle_int_to_str :
43- raise ValueError (f"Value must be between 0 and 2. Got { num } ." )
44- return BloscShuffle [blosc_shuffle_int_to_str [num ]]
36+ class BloscConfigV2 (TypedDict ):
37+ cname : BloscCname
38+ clevel : int
39+ shuffle : int
40+ blocksize : int
41+ typesize : NotRequired [int ]
4542
4643
47- class BloscCname (Enum ):
44+ class BloscConfigV3 (TypedDict ):
45+ cname : BloscCname
46+ clevel : int
47+ shuffle : BloscShuffle
48+ blocksize : int
49+ typesize : int
50+
51+
52+ class BloscJSON_V2 (CodecJSON_V2 [Literal ["blosc" ]], BloscConfigV2 ):
4853 """
49- Enum for compression library used by blosc .
54+ The JSON form of the Blosc codec in Zarr V2 .
5055 """
5156
52- lz4 = "lz4"
53- lz4hc = "lz4hc"
54- blosclz = "blosclz"
55- zstd = "zstd"
56- snappy = "snappy"
57- zlib = "zlib"
57+
58+ class BloscJSON_V3 (NamedRequiredConfig [Literal ["blosc" ], BloscConfigV3 ]):
59+ """
60+ The JSON form of the Blosc codec in Zarr V3.
61+ """
62+
63+
64+ def check_json_v2 (data : CodecJSON ) -> TypeGuard [BloscJSON_V2 ]:
65+ return (
66+ isinstance (data , Mapping )
67+ and set (data .keys ()) == {"id" , "clevel" , "cname" , "shuffle" , "blocksize" }
68+ and data ["id" ] == "blosc"
69+ )
70+
71+
72+ def check_json_v3 (data : CodecJSON ) -> TypeGuard [BloscJSON_V3 ]:
73+ return (
74+ isinstance (data , Mapping )
75+ and set (data .keys ()) == {"name" , "configuration" }
76+ and data ["name" ] == "blosc"
77+ and isinstance (data ["configuration" ], Mapping )
78+ and set (data ["configuration" ].keys ())
79+ == {"cname" , "clevel" , "shuffle" , "blocksize" , "typesize" }
80+ )
81+
82+
83+ def parse_cname (value : object ) -> BloscCname :
84+ if value not in BLOSC_CNAME :
85+ raise ValueError (f"Value must be one of { BLOSC_CNAME } . Got { value } instead." )
86+ return value
5887
5988
6089# See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -85,31 +114,35 @@ def parse_blocksize(data: JSON) -> int:
85114 raise TypeError (f"Value should be an int. Got { type (data )} instead." )
86115
87116
117+ def parse_shuffle (data : object ) -> BloscShuffle :
118+ if data in BLOSC_SHUFFLE :
119+ return data # type: ignore[return-value]
120+ raise TypeError (f"Value must be one of { BLOSC_SHUFFLE } . Got { data } instead." )
121+
122+
88123@dataclass (frozen = True )
89124class BloscCodec (BytesBytesCodec ):
90- """blosc codec"""
91-
92125 is_fixed_size = False
93126
94127 typesize : int | None
95- cname : BloscCname = BloscCname . zstd
96- clevel : int = 5
97- shuffle : BloscShuffle | None = BloscShuffle . noshuffle
98- blocksize : int = 0
128+ cname : BloscCname
129+ clevel : int
130+ shuffle : BloscShuffle | None
131+ blocksize : int
99132
100133 def __init__ (
101134 self ,
102135 * ,
103136 typesize : int | None = None ,
104- cname : BloscCname | str = BloscCname . zstd ,
137+ cname : BloscCname = " zstd" ,
105138 clevel : int = 5 ,
106- shuffle : BloscShuffle | str | None = None ,
139+ shuffle : BloscShuffle | None = None ,
107140 blocksize : int = 0 ,
108141 ) -> None :
109142 typesize_parsed = parse_typesize (typesize ) if typesize is not None else None
110- cname_parsed = parse_enum (cname , BloscCname )
143+ cname_parsed = parse_cname (cname )
111144 clevel_parsed = parse_clevel (clevel )
112- shuffle_parsed = parse_enum (shuffle , BloscShuffle ) if shuffle is not None else None
145+ shuffle_parsed = parse_shuffle (shuffle ) if shuffle is not None else None
113146 blocksize_parsed = parse_blocksize (blocksize )
114147
115148 object .__setattr__ (self , "typesize" , typesize_parsed )
@@ -120,24 +153,74 @@ def __init__(
120153
121154 @classmethod
122155 def from_dict (cls , data : dict [str , JSON ]) -> Self :
123- _ , configuration_parsed = parse_named_configuration (data , "blosc" )
124- return cls (** configuration_parsed ) # type: ignore[arg-type]
156+ return cls .from_json (data , zarr_format = 3 )
125157
126158 def to_dict (self ) -> dict [str , JSON ]:
127- if self .typesize is None :
128- raise ValueError ("`typesize` needs to be set for serialization." )
129- if self .shuffle is None :
130- raise ValueError ("`shuffle` needs to be set for serialization." )
131- return {
132- "name" : "blosc" ,
133- "configuration" : {
134- "typesize" : self .typesize ,
135- "cname" : self .cname .value ,
159+ return self .to_json (zarr_format = 3 )
160+
161+ @classmethod
162+ def _from_json_v2 (cls , data : CodecJSON ) -> Self :
163+ if check_json_v2 (data ):
164+ return cls (
165+ cname = data ["cname" ],
166+ clevel = data ["clevel" ],
167+ shuffle = BLOSC_SHUFFLE [data ["shuffle" ]],
168+ blocksize = data ["blocksize" ],
169+ typesize = data .get ("typesize" , None ),
170+ )
171+ msg = (
172+ "Invalid Zarr V2 JSON representation of the blosc codec. "
173+ f"Got { data !r} , expected a Mapping with keys ('id', 'cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
174+ )
175+ raise CodecValidationError (msg )
176+
177+ @classmethod
178+ def _from_json_v3 (cls , data : CodecJSON ) -> Self :
179+ if check_json_v3 (data ):
180+ return cls (
181+ typesize = data ["configuration" ]["typesize" ],
182+ cname = data ["configuration" ]["cname" ],
183+ clevel = data ["configuration" ]["clevel" ],
184+ shuffle = data ["configuration" ]["shuffle" ],
185+ blocksize = data ["configuration" ]["blocksize" ],
186+ )
187+ msg = (
188+ "Invalid Zarr V3 JSON representation of the blosc codec. "
189+ f"Got { data !r} , expected a Mapping with keys ('name', 'configuration')"
190+ "Where the 'configuration' key is a Mapping with keys ('cname', 'clevel', 'shuffle', 'blocksize', 'typesize')"
191+ )
192+ raise CodecValidationError (msg )
193+
194+ @overload
195+ def to_json (self , zarr_format : Literal [2 ]) -> BloscJSON_V2 : ...
196+ @overload
197+ def to_json (self , zarr_format : Literal [3 ]) -> BloscJSON_V3 : ...
198+
199+ def to_json (self , zarr_format : ZarrFormat ) -> BloscJSON_V2 | BloscJSON_V3 :
200+ if self .typesize is None or self .shuffle is None :
201+ raise ValueError ("typesize and blocksize need to be set for encoding." )
202+ if zarr_format == 2 :
203+ return {
204+ "id" : "blosc" ,
136205 "clevel" : self .clevel ,
137- "shuffle" : self .shuffle .value ,
206+ "cname" : self .cname ,
207+ "shuffle" : BLOSC_SHUFFLE .index (self .shuffle ),
138208 "blocksize" : self .blocksize ,
139- },
140- }
209+ }
210+ elif zarr_format == 3 :
211+ return {
212+ "name" : "blosc" ,
213+ "configuration" : {
214+ "clevel" : self .clevel ,
215+ "cname" : self .cname ,
216+ "shuffle" : self .shuffle ,
217+ "typesize" : self .typesize ,
218+ "blocksize" : self .blocksize ,
219+ },
220+ }
221+ raise ValueError (
222+ f"Unsupported Zarr format { zarr_format } . Expected 2 or 3."
223+ ) # pragma: no cover
141224
142225 def evolve_from_array_spec (self , array_spec : ArraySpec ) -> Self :
143226 item_size = 1
@@ -147,26 +230,18 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
147230 if new_codec .typesize is None :
148231 new_codec = replace (new_codec , typesize = item_size )
149232 if new_codec .shuffle is None :
150- new_codec = replace (
151- new_codec ,
152- shuffle = (BloscShuffle .bitshuffle if item_size == 1 else BloscShuffle .shuffle ),
153- )
233+ new_codec = replace (new_codec , shuffle = "bitshuffle" if item_size == 1 else "shuffle" )
154234
155235 return new_codec
156236
157237 @cached_property
158238 def _blosc_codec (self ) -> Blosc :
159239 if self .shuffle is None :
160240 raise ValueError ("`shuffle` needs to be set for decoding and encoding." )
161- map_shuffle_str_to_int = {
162- BloscShuffle .noshuffle : 0 ,
163- BloscShuffle .shuffle : 1 ,
164- BloscShuffle .bitshuffle : 2 ,
165- }
166241 config_dict = {
167- "cname" : self .cname . name ,
242+ "cname" : self .cname ,
168243 "clevel" : self .clevel ,
169- "shuffle" : map_shuffle_str_to_int [ self .shuffle ] ,
244+ "shuffle" : BLOSC_SHUFFLE . index ( self .shuffle ) ,
170245 "blocksize" : self .blocksize ,
171246 }
172247 # See https://github.com/zarr-developers/numcodecs/pull/713
0 commit comments