Skip to content

Commit 2939fe9

Browse files
committed
route requests for v2 codecs to use the v3-adapted codecs
1 parent d26eeae commit 2939fe9

File tree

13 files changed

+499
-190
lines changed

13 files changed

+499
-190
lines changed

src/zarr/codecs/__init__.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,21 @@
77
from zarr.codecs.numcodecs import (
88
BZ2,
99
CRC32,
10-
CRC32C,
1110
LZ4,
1211
LZMA,
1312
ZFPY,
1413
Adler32,
1514
AsType,
1615
BitRound,
17-
Blosc,
1816
Delta,
1917
FixedScaleOffset,
2018
Fletcher32,
21-
GZip,
2219
JenkinsLookup3,
2320
PackBits,
2421
PCodec,
2522
Quantize,
2623
Shuffle,
2724
Zlib,
28-
Zstd,
2925
)
3026
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
3127
from zarr.codecs.transpose import TransposeCodec
@@ -65,29 +61,46 @@
6561
# Register all the codecs formerly contained in numcodecs.zarr3
6662

6763
register_codec("numcodecs.bz2", BZ2, qualname="zarr.codecs.numcodecs.BZ2")
64+
register_codec("bz2", BZ2, qualname="zarr.codecs.numcodecs.BZ2")
6865
register_codec("numcodecs.crc32", CRC32, qualname="zarr.codecs.numcodecs.CRC32")
69-
register_codec("numcodecs.crc32c", CRC32C, qualname="zarr.codecs.numcodecs.CRC32C")
66+
register_codec("crc32", CRC32, qualname="zarr.codecs.numcodecs.CRC32")
7067
register_codec("numcodecs.lz4", LZ4, qualname="zarr.codecs.numcodecs.LZ4")
68+
register_codec("lz4", LZ4, qualname="zarr.codecs.numcodecs.LZ4")
7169
register_codec("numcodecs.lzma", LZMA, qualname="zarr.codecs.numcodecs.LZMA")
70+
register_codec("lzma", LZMA, qualname="zarr.codecs.numcodecs.LZMA")
7271
register_codec("numcodecs.zfpy", ZFPY, qualname="zarr.codecs.numcodecs.ZFPY")
72+
register_codec("zfpy", ZFPY, qualname="zarr.codecs.numcodecs.ZFPY")
7373
register_codec("numcodecs.adler32", Adler32, qualname="zarr.codecs.numcodecs.Adler32")
74+
register_codec("adler32", Adler32, qualname="zarr.codecs.numcodecs.Adler32")
7475
register_codec("numcodecs.astype", AsType, qualname="zarr.codecs.numcodecs.AsType")
76+
register_codec("astype", AsType, qualname="zarr.codecs.numcodecs.AsType")
7577
register_codec("numcodecs.bitround", BitRound, qualname="zarr.codecs.numcodecs.BitRound")
76-
register_codec("numcodecs.blosc", Blosc, qualname="zarr.codecs.numcodecs.Blosc")
78+
register_codec("bitround", BitRound, qualname="zarr.codecs.numcodecs.BitRound")
7779
register_codec("numcodecs.delta", Delta, qualname="zarr.codecs.numcodecs.Delta")
80+
register_codec("delta", Delta, qualname="zarr.codecs.numcodecs.Delta")
7881
register_codec(
7982
"numcodecs.fixedscaleoffset",
8083
FixedScaleOffset,
8184
qualname="zarr.codecs.numcodecs.FixedScaleOffset",
8285
)
86+
register_codec(
87+
"fixedscaleoffset",
88+
FixedScaleOffset,
89+
qualname="zarr.codecs.numcodecs.FixedScaleOffset",
90+
)
8391
register_codec("numcodecs.fletcher32", Fletcher32, qualname="zarr.codecs.numcodecs.Fletcher32")
84-
register_codec("numcodecs.gzip", GZip, qualname="zarr.codecs.numcodecs.GZip")
92+
register_codec("fletcher32", Fletcher32, qualname="zarr.codecs.numcodecs.Fletcher32")
8593
register_codec(
8694
"numcodecs.jenkins_lookup3", JenkinsLookup3, qualname="zarr.codecs.numcodecs.JenkinsLookup3"
8795
)
96+
register_codec("jenkins_lookup3", JenkinsLookup3, qualname="zarr.codecs.numcodecs.JenkinsLookup3")
8897
register_codec("numcodecs.pcodec", PCodec, qualname="zarr.codecs.numcodecs.pcodec")
98+
register_codec("pcodec", PCodec, qualname="zarr.codecs.numcodecs.pcodec")
8999
register_codec("numcodecs.packbits", PackBits, qualname="zarr.codecs.numcodecs.PackBits")
100+
register_codec("packbits", PackBits, qualname="zarr.codecs.numcodecs.PackBits")
90101
register_codec("numcodecs.quantize", Quantize, qualname="zarr.codecs.numcodecs.Quantize")
102+
register_codec("quantize", Quantize, qualname="zarr.codecs.numcodecs.Quantize")
91103
register_codec("numcodecs.shuffle", Shuffle, qualname="zarr.codecs.numcodecs.Shuffle")
104+
register_codec("shuffle", Shuffle, qualname="zarr.codecs.numcodecs.Shuffle")
92105
register_codec("numcodecs.zlib", Zlib, qualname="zarr.codecs.numcodecs.Zlib")
93-
register_codec("numcodecs.zstd", Zstd, qualname="zarr.codecs.numcodecs.Zstd")
106+
register_codec("zlib", Zlib, qualname="zarr.codecs.numcodecs.Zlib")

src/zarr/codecs/_v2.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
from __future__ import annotations
22

33
import asyncio
4-
from collections.abc import Mapping
54
from dataclasses import dataclass
65
from functools import cached_property
7-
from typing import TYPE_CHECKING, ClassVar, Literal, Self, overload
6+
from typing import TYPE_CHECKING, Literal, Self, overload
87

98
import numpy as np
109
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
@@ -16,16 +15,18 @@
1615
CodecJSON,
1716
CodecJSON_V2,
1817
)
19-
from zarr.core.chunk_grids import ChunkGrid
20-
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
2118
from zarr.registry import get_ndbuffer_class
2219

2320
if TYPE_CHECKING:
21+
from collections.abc import Mapping
22+
2423
from zarr.abc.numcodec import Numcodec
2524
from zarr.core.array_spec import ArraySpec
2625
from zarr.core.buffer import Buffer, NDBuffer
2726
from zarr.core.buffer.core import BufferPrototype
27+
from zarr.core.chunk_grids import ChunkGrid
2828
from zarr.core.common import BaseConfig, NamedConfig, ZarrFormat
29+
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
2930

3031

3132
@dataclass(frozen=True)
@@ -114,7 +115,7 @@ def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec)
114115

115116
@dataclass(frozen=True, kw_only=True)
116117
class NumcodecsWrapper:
117-
codec_cls: ClassVar[type[Numcodec]]
118+
codec_cls: type[Numcodec]
118119
config: Mapping[str, object]
119120

120121
@cached_property
@@ -128,7 +129,7 @@ def to_json(self, zarr_format: Literal[3]) -> NamedConfig[str, BaseConfig]: ...
128129

129130
def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2[str] | NamedConfig[str, BaseConfig]:
130131
if zarr_format == 2:
131-
return {"id": self.codec_cls.codec_id, **self.config}
132+
return self.config
132133
elif zarr_format == 3:
133134
config = self.codec.get_config()
134135
config_no_id = {k: v for k, v in config.items() if k != "id"}
@@ -184,19 +185,19 @@ def to_array_array(self) -> NumcodecsArrayArrayCodec:
184185
"""
185186
Use the ``_codec`` attribute to create a NumcodecsArrayArrayCodec.
186187
"""
187-
return NumcodecsArrayArrayCodec(config=self.config)
188+
return NumcodecsArrayArrayCodec(cls=self.codec_cls, config=self.config)
188189

189190
def to_bytes_bytes(self) -> NumcodecsBytesBytesCodec:
190191
"""
191192
Use the ``_codec`` attribute to create a NumcodecsBytesBytesCodec.
192193
"""
193-
return NumcodecsBytesBytesCodec(config=self.config)
194+
return NumcodecsBytesBytesCodec(cls=self.codec_cls, config=self.config)
194195

195196
def to_array_bytes(self) -> NumcodecsArrayBytesCodec:
196197
"""
197198
Use the ``_codec`` attribute to create a NumcodecsArrayBytesCodec.
198199
"""
199-
return NumcodecsArrayBytesCodec(config=self.config)
200+
return NumcodecsArrayBytesCodec(codec_cls=self.codec_cls, config=self.config)
200201

201202

202203
class NumcodecsBytesBytesCodec(NumcodecsWrapper, BytesBytesCodec):

src/zarr/codecs/blosc.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,10 @@ def parse_shuffle(data: object) -> BloscShuffle:
134134
class BloscCodec(BytesBytesCodec):
135135
is_fixed_size = False
136136

137-
typesize: int
137+
typesize: int | None
138138
cname: BloscCname
139139
clevel: int
140-
shuffle: BloscShuffle
140+
shuffle: BloscShuffle | None
141141
blocksize: int
142142

143143
def __init__(
@@ -149,10 +149,10 @@ def __init__(
149149
shuffle: BloscShuffle | None = None,
150150
blocksize: int = 0,
151151
) -> None:
152-
typesize_parsed = parse_typesize(typesize) if typesize is not None else 1
152+
typesize_parsed = parse_typesize(typesize) if typesize is not None else None
153153
cname_parsed = parse_cname(cname)
154154
clevel_parsed = parse_clevel(clevel)
155-
shuffle_parsed = parse_shuffle(shuffle) if shuffle is not None else "noshuffle"
155+
shuffle_parsed = parse_shuffle(shuffle) if shuffle is not None else None
156156
blocksize_parsed = parse_blocksize(blocksize)
157157

158158
object.__setattr__(self, "typesize", typesize_parsed)
@@ -166,7 +166,20 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
166166
return cls.from_json(data, zarr_format=3)
167167

168168
def to_dict(self) -> dict[str, JSON]:
169-
return self.to_json(zarr_format=3)
169+
if self.shuffle is None:
170+
raise ValueError("`shuffle` must be set to serialize to zarr format 3")
171+
if self.typesize is None:
172+
raise ValueError("`typesize` must be set to serialize to zarr format 3")
173+
return {
174+
"name": "blosc",
175+
"configuration": {
176+
"clevel": self.clevel,
177+
"cname": self.cname,
178+
"shuffle": self.shuffle,
179+
"typesize": self.typesize,
180+
"blocksize": self.blocksize,
181+
},
182+
}
170183

171184
@classmethod
172185
def _from_json_v2(cls, data: CodecJSON) -> Self:
@@ -176,7 +189,7 @@ def _from_json_v2(cls, data: CodecJSON) -> Self:
176189
clevel=data["clevel"],
177190
shuffle=BLOSC_SHUFFLE[data["shuffle"]],
178191
blocksize=data["blocksize"],
179-
typesize=data.get("typesize", None),
192+
typesize=data.get("typesize", 1),
180193
)
181194
msg = (
182195
"Invalid Zarr V2 JSON representation of the blosc codec. "
@@ -208,6 +221,8 @@ def to_json(self, zarr_format: Literal[3]) -> BloscJSON_V3: ...
208221

209222
def to_json(self, zarr_format: ZarrFormat) -> BloscJSON_V2 | BloscJSON_V3:
210223
if zarr_format == 2:
224+
if self.shuffle is None:
225+
raise ValueError("`shuffle` must be set to serialize to zarr format 2")
211226
return {
212227
"id": "blosc",
213228
"clevel": self.clevel,
@@ -216,16 +231,7 @@ def to_json(self, zarr_format: ZarrFormat) -> BloscJSON_V2 | BloscJSON_V3:
216231
"blocksize": self.blocksize,
217232
}
218233
elif zarr_format == 3:
219-
return {
220-
"name": "blosc",
221-
"configuration": {
222-
"clevel": self.clevel,
223-
"cname": self.cname,
224-
"shuffle": self.shuffle,
225-
"typesize": self.typesize,
226-
"blocksize": self.blocksize,
227-
},
228-
}
234+
return self.to_dict() # type: ignore[return-value]
229235
raise ValueError(
230236
f"Unsupported Zarr format {zarr_format}. Expected 2 or 3."
231237
) # pragma: no cover
@@ -246,6 +252,8 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
246252
def _blosc_codec(self) -> Blosc:
247253
if self.shuffle is None:
248254
raise ValueError("`shuffle` needs to be set for decoding and encoding.")
255+
if self.typesize is None:
256+
raise ValueError("`typesize` needs to be set for decoding and encoding.")
249257
config_dict = {
250258
"cname": self.cname,
251259
"clevel": self.clevel,

0 commit comments

Comments
 (0)