revert enum removal, fix doctests

d-v-b · d-v-b · commit 0252ac3949a9 · 2025-09-15T11:36:52.000+02:00
diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
@@ -47,22 +47,33 @@ This is the current default configuration::
     'buffer': 'zarr.buffer.cpu.Buffer',
     'codec_pipeline': {'batch_size': 1,
                        'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
-    'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
+    'codecs': {'adler32': 'zarr.codecs.numcodecs.Adler32',
+               'astype': 'zarr.codecs.numcodecs.AsType',
+               'bitround': 'zarr.codecs.numcodecs.BitRound',
+               'blosc': 'zarr.codecs.blosc.BloscCodec',
                'bytes': 'zarr.codecs.bytes.BytesCodec',
+               'bz2': 'zarr.codecs.numcodecs.BZ2',
+               'crc32': 'zarr.codecs.numcodecs.CRC32',
                'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
+               'delta': 'zarr.codecs.numcodecs.Delta',
                'endian': 'zarr.codecs.bytes.BytesCodec',
+               'fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
+               'fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
                'gzip': 'zarr.codecs.gzip.GzipCodec',
+               'jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
+               'lz4': 'zarr.codecs.numcodecs.LZ4',
+               'lzma': 'zarr.codecs.numcodecs.LZMA',
                'numcodecs.adler32': 'zarr.codecs.numcodecs.Adler32',
                'numcodecs.astype': 'zarr.codecs.numcodecs.AsType',
                'numcodecs.bitround': 'zarr.codecs.numcodecs.BitRound',
-               'numcodecs.blosc': 'zarr.codecs.numcodecs.Blosc',
+               'numcodecs.blosc': 'zarr.codecs.blosc.BloscCodec',
                'numcodecs.bz2': 'zarr.codecs.numcodecs.BZ2',
                'numcodecs.crc32': 'zarr.codecs.numcodecs.CRC32',
-               'numcodecs.crc32c': 'zarr.codecs.numcodecs.CRC32C',
+               'numcodecs.crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
                'numcodecs.delta': 'zarr.codecs.numcodecs.Delta',
                'numcodecs.fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
                'numcodecs.fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
-               'numcodecs.gZip': 'zarr.codecs.numcodecs.GZip',
+               'numcodecs.gzip': 'zarr.codecs.gzip.GzipCodec',
                'numcodecs.jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
                'numcodecs.lz4': 'zarr.codecs.numcodecs.LZ4',
                'numcodecs.lzma': 'zarr.codecs.numcodecs.LZMA',
@@ -72,11 +83,17 @@ This is the current default configuration::
                'numcodecs.shuffle': 'zarr.codecs.numcodecs.Shuffle',
                'numcodecs.zfpy': 'zarr.codecs.numcodecs.ZFPY',
                'numcodecs.zlib': 'zarr.codecs.numcodecs.Zlib',
-               'numcodecs.zstd': 'zarr.codecs.numcodecs.Zstd',
+               'numcodecs.zstd': 'zarr.codecs.zstd.ZstdCodec',
+               'packbits': 'zarr.codecs.numcodecs.PackBits',
+               'pcodec': 'zarr.codecs.numcodecs.PCodec',
+               'quantize': 'zarr.codecs.numcodecs.Quantize',
                'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec',
+               'shuffle': 'zarr.codecs.numcodecs.Shuffle',
                'transpose': 'zarr.codecs.transpose.TransposeCodec',
                'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec',
                'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec',
+               'zfpy': 'zarr.codecs.numcodecs.ZFPY',
+               'zlib': 'zarr.codecs.numcodecs.Zlib',
                'zstd': 'zarr.codecs.zstd.ZstdCodec'},
     'default_zarr_format': 3,
     'json_indent': 2,
diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
+from zarr.codecs.blosc import BloscCname, BloscCname_lit, BloscCodec, BloscShuffle, BloscShuffle_lit
 from zarr.codecs.bytes import BytesCodec, Endian
 from zarr.codecs.crc32c_ import Crc32cCodec
 from zarr.codecs.gzip import GzipCodec
@@ -31,8 +31,10 @@
 
 __all__ = [
     "BloscCname",
+    "BloscCname_lit",
     "BloscCodec",
     "BloscShuffle",
+    "BloscShuffle_lit",
     "BytesCodec",
     "Crc32cCodec",
     "Endian",
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
@@ -3,6 +3,7 @@
 import asyncio
 from collections.abc import Mapping
 from dataclasses import dataclass, replace
+from enum import Enum
 from functools import cached_property
 from typing import (
     TYPE_CHECKING,
@@ -34,25 +35,64 @@
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import Buffer
 
-BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"]
+
+class BloscShuffle(Enum):
+    """
+    Enum for shuffle filter used by blosc.
+    """
+
+    noshuffle = "noshuffle"
+    shuffle = "shuffle"
+    bitshuffle = "bitshuffle"
+
+    @classmethod
+    def from_int(cls, num: int) -> BloscShuffle:
+        blosc_shuffle_int_to_str = {
+            0: "noshuffle",
+            1: "shuffle",
+            2: "bitshuffle",
+        }
+        if num not in blosc_shuffle_int_to_str:
+            raise ValueError(f"Value must be between 0 and 2. Got {num}.")
+        return BloscShuffle[blosc_shuffle_int_to_str[num]]
+
+
+class BloscCname(Enum):
+    """
+    Enum for compression library used by blosc.
+    """
+
+    lz4 = "lz4"
+    lz4hc = "lz4hc"
+    blosclz = "blosclz"
+    zstd = "zstd"
+    snappy = "snappy"
+    zlib = "zlib"
+
+
+# TODO: Rename this when we retire the enums
+BloscShuffle_lit = Literal["noshuffle", "shuffle", "bitshuffle"]
+"""The names of the shuffle options used by the blosc codec."""
 BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")
 
-BloscCname = Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"]
+# TODO: rename this when we retire the enums
+BloscCname_lit = Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"]
+"""The names of the compression libraries used by the blosc codec"""
 BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib")
 
 
 class BloscConfigV2(TypedDict):
-    cname: BloscCname
+    cname: BloscCname_lit
     clevel: int
     shuffle: int
     blocksize: int
     typesize: NotRequired[int]
 
 
 class BloscConfigV3(TypedDict):
-    cname: BloscCname
+    cname: BloscCname_lit
     clevel: int
-    shuffle: BloscShuffle
+    shuffle: BloscShuffle_lit
     blocksize: int
     typesize: int
 
@@ -91,9 +131,14 @@ def check_json_v3(data: object) -> TypeGuard[BloscJSON_V3]:
 
 
 def parse_cname(value: object) -> BloscCname:
-    if value not in BLOSC_CNAME:
-        raise ValueError(f"Value must be one of {BLOSC_CNAME}. Got {value} instead.")
-    return value  # type: ignore[return-value]
+    if isinstance(value, BloscCname):
+        return value
+    if isinstance(value, str):
+        if value not in BLOSC_CNAME:
+            raise ValueError(f"Value must be one of {BLOSC_CNAME}. Got {value} instead.")
+        return BloscCname[value]
+    msg = f"Value must be an instance of `BloscCname` or a string in {BLOSC_CNAME}. Got {value} instead."
+    raise TypeError(msg)
 
 
 # See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -125,8 +170,10 @@ def parse_blocksize(data: JSON) -> int:
 
 
 def parse_shuffle(data: object) -> BloscShuffle:
+    if isinstance(data, BloscShuffle):
+        return data
     if data in BLOSC_SHUFFLE:
-        return data  # type: ignore[return-value]
+        return BloscShuffle[data]  # type: ignore[misc]
     raise TypeError(f"Value must be one of {BLOSC_SHUFFLE}. Got {data} instead.")
 
 
@@ -144,9 +191,9 @@ def __init__(
         self,
         *,
         typesize: int | None = None,
-        cname: BloscCname = "zstd",
+        cname: BloscCname_lit | BloscCname = "zstd",
         clevel: int = 5,
-        shuffle: BloscShuffle | None = None,
+        shuffle: BloscShuffle_lit | BloscShuffle | None = None,
         blocksize: int = 0,
     ) -> None:
         typesize_parsed = parse_typesize(typesize) if typesize is not None else None
@@ -174,8 +221,8 @@ def to_dict(self) -> dict[str, JSON]:
             "name": "blosc",
             "configuration": {
                 "clevel": self.clevel,
-                "cname": self.cname,
-                "shuffle": self.shuffle,
+                "cname": self.cname.value,
+                "shuffle": self.shuffle.value,
                 "typesize": self.typesize,
                 "blocksize": self.blocksize,
             },
@@ -226,8 +273,8 @@ def to_json(self, zarr_format: ZarrFormat) -> BloscJSON_V2 | BloscJSON_V3:
             return {
                 "id": "blosc",
                 "clevel": self.clevel,
-                "cname": self.cname,
-                "shuffle": BLOSC_SHUFFLE.index(self.shuffle),
+                "cname": self.cname.value,
+                "shuffle": BLOSC_SHUFFLE.index(self.shuffle.value),
                 "blocksize": self.blocksize,
             }
         elif zarr_format == 3:
@@ -244,7 +291,10 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         if new_codec.typesize is None:
             new_codec = replace(new_codec, typesize=item_size)
         if new_codec.shuffle is None:
-            new_codec = replace(new_codec, shuffle="bitshuffle" if item_size == 1 else "shuffle")
+            new_codec = replace(
+                new_codec,
+                shuffle=BloscShuffle.bitshuffle if item_size == 1 else BloscShuffle.shuffle,
+            )
 
         return new_codec
 
@@ -255,9 +305,9 @@ def _blosc_codec(self) -> Blosc:
         if self.typesize is None:
             raise ValueError("`typesize` needs to be set for decoding and encoding.")
         config_dict = {
-            "cname": self.cname,
+            "cname": self.cname.value,
             "clevel": self.clevel,
-            "shuffle": BLOSC_SHUFFLE.index(self.shuffle),
+            "shuffle": BLOSC_SHUFFLE.index(self.shuffle.value),
             "blocksize": self.blocksize,
         }
         # See https://github.com/zarr-developers/numcodecs/pull/713
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
@@ -51,9 +51,11 @@ class BytesJSON_V2(BytesConfig):
 BytesJSON_V3 = NamedConfig[Literal["bytes"], BytesConfig] | Literal["bytes"]
 
 
-def parse_endianness(data: object) -> EndiannessStr:
+def parse_endianness(data: object) -> Endian:
     if data in ENDIANNESS_STR:
-        return data  # type: ignore [return-value]
+        return Endian[data]  # type: ignore [misc]
+    if isinstance(data, Endian):
+        return data
     raise ValueError(f"Invalid endianness: {data!r}. Expected one of {ENDIANNESS_STR}")
 
 
@@ -81,9 +83,9 @@ def check_json_v3(data: object) -> TypeGuard[BytesJSON_V3]:
 class BytesCodec(ArrayBytesCodec):
     is_fixed_size = True
 
-    endian: EndiannessStr | None
+    endian: Endian | None
 
-    def __init__(self, *, endian: EndiannessStr | str | None = default_system_endian) -> None:
+    def __init__(self, *, endian: EndiannessStr | Endian | None = default_system_endian) -> None:
         endian_parsed = None if endian is None else parse_endianness(endian)
 
         object.__setattr__(self, "endian", endian_parsed)
@@ -121,14 +123,14 @@ def to_json(self, zarr_format: ZarrFormat) -> BytesJSON_V2 | BytesJSON_V3:
             if self.endian is not None:
                 return {
                     "id": "bytes",
-                    "endian": self.endian,
+                    "endian": self.endian.value,
                 }
             return {"id": "bytes"}
         elif zarr_format == 3:
             if self.endian is not None:
                 return {
                     "name": "bytes",
-                    "configuration": {"endian": self.endian},
+                    "configuration": {"endian": self.endian.value},
                 }
             return {"name": "bytes"}
         raise ValueError(
@@ -152,7 +154,7 @@ async def _decode_single(
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
         # TODO: remove endianness enum in favor of literal union
-        endian = self.endian if self.endian is not None else None
+        endian = self.endian.value if self.endian is not None else None
         if isinstance(chunk_spec.dtype, HasEndianness) and endian is not None:
             dtype = replace(chunk_spec.dtype, endianness=endian).to_native_dtype()  # type: ignore[call-arg]
         else:
@@ -182,11 +184,11 @@ async def _encode_single(
         if (
             chunk_array.dtype.itemsize > 1
             and self.endian is not None
-            and self.endian != chunk_array.byteorder.value
+            and self.endian.value != chunk_array.byteorder.value
         ):
             # type-ignore is a numpy bug
             # see https://github.com/numpy/numpy/issues/26473
-            new_dtype = chunk_array.dtype.newbyteorder(self.endian)
+            new_dtype = chunk_array.dtype.newbyteorder(self.endian.value)
             chunk_array = chunk_array.astype(new_dtype)
 
         nd_array = chunk_array.as_ndarray_like()
diff --git a/tests/test_codecs/test_blosc.py b/tests/test_codecs/test_blosc.py
@@ -11,10 +11,10 @@
 from zarr.codecs.blosc import (
     BLOSC_CNAME,
     BLOSC_SHUFFLE,
-    BloscCname,
+    BloscCname_lit,
     BloscJSON_V2,
     BloscJSON_V3,
-    BloscShuffle,
+    BloscShuffle_lit,
 )
 from zarr.core.buffer import default_buffer_prototype
 from zarr.storage import StorePath
@@ -26,7 +26,7 @@
 @pytest.mark.parametrize("blocksize", [1, 2])
 @pytest.mark.parametrize("typesize", [1, 2])
 def test_to_json_v2(
-    cname: BloscCname, shuffle: BloscShuffle, clevel: int, blocksize: int, typesize: int
+    cname: BloscCname_lit, shuffle: BloscShuffle_lit, clevel: int, blocksize: int, typesize: int
 ) -> None:
     codec = BloscCodec(
         shuffle=shuffle, cname=cname, clevel=clevel, blocksize=blocksize, typesize=typesize