Skip to content

Commit 0252ac3

Browse files
committed
revert enum removal, fix doctests
1 parent fe21d7c commit 0252ac3

File tree

5 files changed

+107
-36
lines changed

5 files changed

+107
-36
lines changed

docs/user-guide/config.rst

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,33 @@ This is the current default configuration::
4747
'buffer': 'zarr.buffer.cpu.Buffer',
4848
'codec_pipeline': {'batch_size': 1,
4949
'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
50-
'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
50+
'codecs': {'adler32': 'zarr.codecs.numcodecs.Adler32',
51+
'astype': 'zarr.codecs.numcodecs.AsType',
52+
'bitround': 'zarr.codecs.numcodecs.BitRound',
53+
'blosc': 'zarr.codecs.blosc.BloscCodec',
5154
'bytes': 'zarr.codecs.bytes.BytesCodec',
55+
'bz2': 'zarr.codecs.numcodecs.BZ2',
56+
'crc32': 'zarr.codecs.numcodecs.CRC32',
5257
'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
58+
'delta': 'zarr.codecs.numcodecs.Delta',
5359
'endian': 'zarr.codecs.bytes.BytesCodec',
60+
'fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
61+
'fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
5462
'gzip': 'zarr.codecs.gzip.GzipCodec',
63+
'jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
64+
'lz4': 'zarr.codecs.numcodecs.LZ4',
65+
'lzma': 'zarr.codecs.numcodecs.LZMA',
5566
'numcodecs.adler32': 'zarr.codecs.numcodecs.Adler32',
5667
'numcodecs.astype': 'zarr.codecs.numcodecs.AsType',
5768
'numcodecs.bitround': 'zarr.codecs.numcodecs.BitRound',
58-
'numcodecs.blosc': 'zarr.codecs.numcodecs.Blosc',
69+
'numcodecs.blosc': 'zarr.codecs.blosc.BloscCodec',
5970
'numcodecs.bz2': 'zarr.codecs.numcodecs.BZ2',
6071
'numcodecs.crc32': 'zarr.codecs.numcodecs.CRC32',
61-
'numcodecs.crc32c': 'zarr.codecs.numcodecs.CRC32C',
72+
'numcodecs.crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
6273
'numcodecs.delta': 'zarr.codecs.numcodecs.Delta',
6374
'numcodecs.fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
6475
'numcodecs.fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
65-
'numcodecs.gZip': 'zarr.codecs.numcodecs.GZip',
76+
'numcodecs.gzip': 'zarr.codecs.gzip.GzipCodec',
6677
'numcodecs.jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
6778
'numcodecs.lz4': 'zarr.codecs.numcodecs.LZ4',
6879
'numcodecs.lzma': 'zarr.codecs.numcodecs.LZMA',
@@ -72,11 +83,17 @@ This is the current default configuration::
7283
'numcodecs.shuffle': 'zarr.codecs.numcodecs.Shuffle',
7384
'numcodecs.zfpy': 'zarr.codecs.numcodecs.ZFPY',
7485
'numcodecs.zlib': 'zarr.codecs.numcodecs.Zlib',
75-
'numcodecs.zstd': 'zarr.codecs.numcodecs.Zstd',
86+
'numcodecs.zstd': 'zarr.codecs.zstd.ZstdCodec',
87+
'packbits': 'zarr.codecs.numcodecs.PackBits',
88+
'pcodec': 'zarr.codecs.numcodecs.PCodec',
89+
'quantize': 'zarr.codecs.numcodecs.Quantize',
7690
'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec',
91+
'shuffle': 'zarr.codecs.numcodecs.Shuffle',
7792
'transpose': 'zarr.codecs.transpose.TransposeCodec',
7893
'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec',
7994
'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec',
95+
'zfpy': 'zarr.codecs.numcodecs.ZFPY',
96+
'zlib': 'zarr.codecs.numcodecs.Zlib',
8097
'zstd': 'zarr.codecs.zstd.ZstdCodec'},
8198
'default_zarr_format': 3,
8299
'json_indent': 2,

src/zarr/codecs/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
3+
from zarr.codecs.blosc import BloscCname, BloscCname_lit, BloscCodec, BloscShuffle, BloscShuffle_lit
44
from zarr.codecs.bytes import BytesCodec, Endian
55
from zarr.codecs.crc32c_ import Crc32cCodec
66
from zarr.codecs.gzip import GzipCodec
@@ -31,8 +31,10 @@
3131

3232
__all__ = [
3333
"BloscCname",
34+
"BloscCname_lit",
3435
"BloscCodec",
3536
"BloscShuffle",
37+
"BloscShuffle_lit",
3638
"BytesCodec",
3739
"Crc32cCodec",
3840
"Endian",

src/zarr/codecs/blosc.py

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
from collections.abc import Mapping
55
from dataclasses import dataclass, replace
6+
from enum import Enum
67
from functools import cached_property
78
from typing import (
89
TYPE_CHECKING,
@@ -34,25 +35,64 @@
3435
from zarr.core.array_spec import ArraySpec
3536
from zarr.core.buffer import Buffer
3637

37-
BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"]
38+
39+
class BloscShuffle(Enum):
40+
"""
41+
Enum for shuffle filter used by blosc.
42+
"""
43+
44+
noshuffle = "noshuffle"
45+
shuffle = "shuffle"
46+
bitshuffle = "bitshuffle"
47+
48+
@classmethod
49+
def from_int(cls, num: int) -> BloscShuffle:
50+
blosc_shuffle_int_to_str = {
51+
0: "noshuffle",
52+
1: "shuffle",
53+
2: "bitshuffle",
54+
}
55+
if num not in blosc_shuffle_int_to_str:
56+
raise ValueError(f"Value must be between 0 and 2. Got {num}.")
57+
return BloscShuffle[blosc_shuffle_int_to_str[num]]
58+
59+
60+
class BloscCname(Enum):
61+
"""
62+
Enum for compression library used by blosc.
63+
"""
64+
65+
lz4 = "lz4"
66+
lz4hc = "lz4hc"
67+
blosclz = "blosclz"
68+
zstd = "zstd"
69+
snappy = "snappy"
70+
zlib = "zlib"
71+
72+
73+
# TODO: Rename this when we retire the enums
74+
BloscShuffle_lit = Literal["noshuffle", "shuffle", "bitshuffle"]
75+
"""The names of the shuffle options used by the blosc codec."""
3876
BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")
3977

40-
BloscCname = Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"]
78+
# TODO: rename this when we retire the enums
79+
BloscCname_lit = Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"]
80+
"""The names of the compression libraries used by the blosc codec"""
4181
BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib")
4282

4383

4484
class BloscConfigV2(TypedDict):
45-
cname: BloscCname
85+
cname: BloscCname_lit
4686
clevel: int
4787
shuffle: int
4888
blocksize: int
4989
typesize: NotRequired[int]
5090

5191

5292
class BloscConfigV3(TypedDict):
53-
cname: BloscCname
93+
cname: BloscCname_lit
5494
clevel: int
55-
shuffle: BloscShuffle
95+
shuffle: BloscShuffle_lit
5696
blocksize: int
5797
typesize: int
5898

@@ -91,9 +131,14 @@ def check_json_v3(data: object) -> TypeGuard[BloscJSON_V3]:
91131

92132

93133
def parse_cname(value: object) -> BloscCname:
94-
if value not in BLOSC_CNAME:
95-
raise ValueError(f"Value must be one of {BLOSC_CNAME}. Got {value} instead.")
96-
return value # type: ignore[return-value]
134+
if isinstance(value, BloscCname):
135+
return value
136+
if isinstance(value, str):
137+
if value not in BLOSC_CNAME:
138+
raise ValueError(f"Value must be one of {BLOSC_CNAME}. Got {value} instead.")
139+
return BloscCname[value]
140+
msg = f"Value must be an instance of `BloscCname` or a string in {BLOSC_CNAME}. Got {value} instead."
141+
raise TypeError(msg)
97142

98143

99144
# See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -125,8 +170,10 @@ def parse_blocksize(data: JSON) -> int:
125170

126171

127172
def parse_shuffle(data: object) -> BloscShuffle:
173+
if isinstance(data, BloscShuffle):
174+
return data
128175
if data in BLOSC_SHUFFLE:
129-
return data # type: ignore[return-value]
176+
return BloscShuffle[data] # type: ignore[misc]
130177
raise TypeError(f"Value must be one of {BLOSC_SHUFFLE}. Got {data} instead.")
131178

132179

@@ -144,9 +191,9 @@ def __init__(
144191
self,
145192
*,
146193
typesize: int | None = None,
147-
cname: BloscCname = "zstd",
194+
cname: BloscCname_lit | BloscCname = "zstd",
148195
clevel: int = 5,
149-
shuffle: BloscShuffle | None = None,
196+
shuffle: BloscShuffle_lit | BloscShuffle | None = None,
150197
blocksize: int = 0,
151198
) -> None:
152199
typesize_parsed = parse_typesize(typesize) if typesize is not None else None
@@ -174,8 +221,8 @@ def to_dict(self) -> dict[str, JSON]:
174221
"name": "blosc",
175222
"configuration": {
176223
"clevel": self.clevel,
177-
"cname": self.cname,
178-
"shuffle": self.shuffle,
224+
"cname": self.cname.value,
225+
"shuffle": self.shuffle.value,
179226
"typesize": self.typesize,
180227
"blocksize": self.blocksize,
181228
},
@@ -226,8 +273,8 @@ def to_json(self, zarr_format: ZarrFormat) -> BloscJSON_V2 | BloscJSON_V3:
226273
return {
227274
"id": "blosc",
228275
"clevel": self.clevel,
229-
"cname": self.cname,
230-
"shuffle": BLOSC_SHUFFLE.index(self.shuffle),
276+
"cname": self.cname.value,
277+
"shuffle": BLOSC_SHUFFLE.index(self.shuffle.value),
231278
"blocksize": self.blocksize,
232279
}
233280
elif zarr_format == 3:
@@ -244,7 +291,10 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
244291
if new_codec.typesize is None:
245292
new_codec = replace(new_codec, typesize=item_size)
246293
if new_codec.shuffle is None:
247-
new_codec = replace(new_codec, shuffle="bitshuffle" if item_size == 1 else "shuffle")
294+
new_codec = replace(
295+
new_codec,
296+
shuffle=BloscShuffle.bitshuffle if item_size == 1 else BloscShuffle.shuffle,
297+
)
248298

249299
return new_codec
250300

@@ -255,9 +305,9 @@ def _blosc_codec(self) -> Blosc:
255305
if self.typesize is None:
256306
raise ValueError("`typesize` needs to be set for decoding and encoding.")
257307
config_dict = {
258-
"cname": self.cname,
308+
"cname": self.cname.value,
259309
"clevel": self.clevel,
260-
"shuffle": BLOSC_SHUFFLE.index(self.shuffle),
310+
"shuffle": BLOSC_SHUFFLE.index(self.shuffle.value),
261311
"blocksize": self.blocksize,
262312
}
263313
# See https://github.com/zarr-developers/numcodecs/pull/713

src/zarr/codecs/bytes.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,11 @@ class BytesJSON_V2(BytesConfig):
5151
BytesJSON_V3 = NamedConfig[Literal["bytes"], BytesConfig] | Literal["bytes"]
5252

5353

54-
def parse_endianness(data: object) -> EndiannessStr:
54+
def parse_endianness(data: object) -> Endian:
5555
if data in ENDIANNESS_STR:
56-
return data # type: ignore [return-value]
56+
return Endian[data] # type: ignore [misc]
57+
if isinstance(data, Endian):
58+
return data
5759
raise ValueError(f"Invalid endianness: {data!r}. Expected one of {ENDIANNESS_STR}")
5860

5961

@@ -81,9 +83,9 @@ def check_json_v3(data: object) -> TypeGuard[BytesJSON_V3]:
8183
class BytesCodec(ArrayBytesCodec):
8284
is_fixed_size = True
8385

84-
endian: EndiannessStr | None
86+
endian: Endian | None
8587

86-
def __init__(self, *, endian: EndiannessStr | str | None = default_system_endian) -> None:
88+
def __init__(self, *, endian: EndiannessStr | Endian | None = default_system_endian) -> None:
8789
endian_parsed = None if endian is None else parse_endianness(endian)
8890

8991
object.__setattr__(self, "endian", endian_parsed)
@@ -121,14 +123,14 @@ def to_json(self, zarr_format: ZarrFormat) -> BytesJSON_V2 | BytesJSON_V3:
121123
if self.endian is not None:
122124
return {
123125
"id": "bytes",
124-
"endian": self.endian,
126+
"endian": self.endian.value,
125127
}
126128
return {"id": "bytes"}
127129
elif zarr_format == 3:
128130
if self.endian is not None:
129131
return {
130132
"name": "bytes",
131-
"configuration": {"endian": self.endian},
133+
"configuration": {"endian": self.endian.value},
132134
}
133135
return {"name": "bytes"}
134136
raise ValueError(
@@ -152,7 +154,7 @@ async def _decode_single(
152154
) -> NDBuffer:
153155
assert isinstance(chunk_bytes, Buffer)
154156
# TODO: remove endianness enum in favor of literal union
155-
endian = self.endian if self.endian is not None else None
157+
endian = self.endian.value if self.endian is not None else None
156158
if isinstance(chunk_spec.dtype, HasEndianness) and endian is not None:
157159
dtype = replace(chunk_spec.dtype, endianness=endian).to_native_dtype() # type: ignore[call-arg]
158160
else:
@@ -182,11 +184,11 @@ async def _encode_single(
182184
if (
183185
chunk_array.dtype.itemsize > 1
184186
and self.endian is not None
185-
and self.endian != chunk_array.byteorder.value
187+
and self.endian.value != chunk_array.byteorder.value
186188
):
187189
# type-ignore is a numpy bug
188190
# see https://github.com/numpy/numpy/issues/26473
189-
new_dtype = chunk_array.dtype.newbyteorder(self.endian)
191+
new_dtype = chunk_array.dtype.newbyteorder(self.endian.value)
190192
chunk_array = chunk_array.astype(new_dtype)
191193

192194
nd_array = chunk_array.as_ndarray_like()

tests/test_codecs/test_blosc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from zarr.codecs.blosc import (
1212
BLOSC_CNAME,
1313
BLOSC_SHUFFLE,
14-
BloscCname,
14+
BloscCname_lit,
1515
BloscJSON_V2,
1616
BloscJSON_V3,
17-
BloscShuffle,
17+
BloscShuffle_lit,
1818
)
1919
from zarr.core.buffer import default_buffer_prototype
2020
from zarr.storage import StorePath
@@ -26,7 +26,7 @@
2626
@pytest.mark.parametrize("blocksize", [1, 2])
2727
@pytest.mark.parametrize("typesize", [1, 2])
2828
def test_to_json_v2(
29-
cname: BloscCname, shuffle: BloscShuffle, clevel: int, blocksize: int, typesize: int
29+
cname: BloscCname_lit, shuffle: BloscShuffle_lit, clevel: int, blocksize: int, typesize: int
3030
) -> None:
3131
codec = BloscCodec(
3232
shuffle=shuffle, cname=cname, clevel=clevel, blocksize=blocksize, typesize=typesize

0 commit comments

Comments
 (0)