Skip to content

Commit 53c6b46

Browse files
committed
wip
1 parent 3d910a5 commit 53c6b46

File tree

6 files changed

+143
-134
lines changed

6 files changed

+143
-134
lines changed

src/zarr/codecs/_v2.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

33
import asyncio
4+
from collections.abc import Mapping
45
from dataclasses import dataclass
5-
from typing import TYPE_CHECKING, Literal, Self, overload
6+
from functools import cached_property
7+
from typing import TYPE_CHECKING, ClassVar, Literal, Self, overload
68

79
import numpy as np
810
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
@@ -112,7 +114,12 @@ def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec)
112114

113115
@dataclass(frozen=True, kw_only=True)
114116
class NumcodecsWrapper:
115-
codec: Numcodec
117+
codec_cls: ClassVar[type[Numcodec]]
118+
config: Mapping[str, object]
119+
120+
@cached_property
121+
def codec(self) -> Numcodec:
122+
return self.codec_cls(**self.config)
116123

117124
@overload
118125
def to_json(self, zarr_format: Literal[2]) -> CodecJSON_V2[str]: ...
@@ -121,7 +128,7 @@ def to_json(self, zarr_format: Literal[3]) -> NamedConfig[str, BaseConfig]: ...
121128

122129
def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2[str] | NamedConfig[str, BaseConfig]:
123130
if zarr_format == 2:
124-
return self.codec.get_config()
131+
return {"id": self.codec_cls.codec_id, **self.config}
125132
elif zarr_format == 3:
126133
config = self.codec.get_config()
127134
config_no_id = {k: v for k, v in config.items() if k != "id"}
@@ -130,15 +137,11 @@ def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2[str] | NamedConfig[st
130137

131138
@classmethod
132139
def _from_json_v2(cls, data: CodecJSON) -> Self:
133-
raise NotADirectoryError(
134-
"This class does not support creating instances from JSON data for Zarr format 2."
135-
)
140+
return cls(config=data)
136141

137142
@classmethod
138143
def _from_json_v3(cls, data: CodecJSON) -> Self:
139-
raise NotImplementedError(
140-
"This class does not support creating instances from JSON data for Zarr format 3."
141-
)
144+
return cls(config=data.get("configuration", {}))
142145

143146
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
144147
raise NotImplementedError
@@ -181,19 +184,19 @@ def to_array_array(self) -> NumcodecsArrayArrayCodec:
181184
"""
182185
Use the ``_codec`` attribute to create a NumcodecsArrayArrayCodec.
183186
"""
184-
return NumcodecsArrayArrayCodec(codec=self.codec)
187+
return NumcodecsArrayArrayCodec(config=self.config)
185188

186189
def to_bytes_bytes(self) -> NumcodecsBytesBytesCodec:
187190
"""
188191
Use the ``_codec`` attribute to create a NumcodecsBytesBytesCodec.
189192
"""
190-
return NumcodecsBytesBytesCodec(codec=self.codec)
193+
return NumcodecsBytesBytesCodec(config=self.config)
191194

192195
def to_array_bytes(self) -> NumcodecsArrayBytesCodec:
193196
"""
194197
Use the ``_codec`` attribute to create a NumcodecsArrayBytesCodec.
195198
"""
196-
return NumcodecsArrayBytesCodec(codec=self.codec)
199+
return NumcodecsArrayBytesCodec(config=self.config)
197200

198201

199202
class NumcodecsBytesBytesCodec(NumcodecsWrapper, BytesBytesCodec):

src/zarr/codecs/blosc.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,10 @@ def parse_shuffle(data: object) -> BloscShuffle:
134134
class BloscCodec(BytesBytesCodec):
135135
is_fixed_size = False
136136

137-
typesize: int | None
137+
typesize: int
138138
cname: BloscCname
139139
clevel: int
140-
shuffle: BloscShuffle | None
140+
shuffle: BloscShuffle
141141
blocksize: int
142142

143143
def __init__(
@@ -149,10 +149,10 @@ def __init__(
149149
shuffle: BloscShuffle | None = None,
150150
blocksize: int = 0,
151151
) -> None:
152-
typesize_parsed = parse_typesize(typesize) if typesize is not None else None
152+
typesize_parsed = parse_typesize(typesize) if typesize is not None else 1
153153
cname_parsed = parse_cname(cname)
154154
clevel_parsed = parse_clevel(clevel)
155-
shuffle_parsed = parse_shuffle(shuffle) if shuffle is not None else None
155+
shuffle_parsed = parse_shuffle(shuffle) if shuffle is not None else "noshuffle"
156156
blocksize_parsed = parse_blocksize(blocksize)
157157

158158
object.__setattr__(self, "typesize", typesize_parsed)
@@ -207,8 +207,6 @@ def to_json(self, zarr_format: Literal[2]) -> BloscJSON_V2: ...
207207
def to_json(self, zarr_format: Literal[3]) -> BloscJSON_V3: ...
208208

209209
def to_json(self, zarr_format: ZarrFormat) -> BloscJSON_V2 | BloscJSON_V3:
210-
if self.typesize is None or self.shuffle is None:
211-
raise ValueError("typesize and blocksize need to be set for encoding.")
212210
if zarr_format == 2:
213211
return {
214212
"id": "blosc",

src/zarr/codecs/numcodecs/_codecs.py

Lines changed: 51 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,13 @@
3030
from dataclasses import dataclass, replace
3131
from functools import cached_property
3232
from typing import TYPE_CHECKING, Any, Final, Literal, Self, overload
33-
from warnings import warn
3433

3534
import numpy as np
3635

3736
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, CodecJSON_V2
38-
from zarr.abc.metadata import Metadata
3937
from zarr.core.buffer.cpu import as_numpy_array_wrapper
4038
from zarr.core.common import JSON, NamedConfig, ZarrFormat, parse_named_configuration, product
4139
from zarr.dtype import UInt8, ZDType, parse_dtype
42-
from zarr.errors import ZarrUserWarning
4340
from zarr.registry import get_numcodec
4441

4542
if TYPE_CHECKING:
@@ -69,55 +66,12 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]:
6966

7067

7168
@dataclass(frozen=True)
72-
class _NumcodecsCodec(Metadata):
73-
codec_name: str
74-
codec_config: dict[str, JSON]
75-
76-
def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs: Any) -> None:
77-
"""To be used only when creating the actual public-facing codec class."""
78-
super().__init_subclass__(**kwargs)
79-
if codec_name is not None:
80-
namespace = codec_name
81-
82-
cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}"
83-
cls.codec_name = f"{CODEC_PREFIX}{namespace}"
84-
cls.__doc__ = f"""
85-
See :class:`{cls_name}` for more details and parameters.
86-
"""
87-
88-
def __init__(self, **codec_config: JSON) -> None:
89-
if not self.codec_name:
90-
raise ValueError(
91-
"The codec name needs to be supplied through the `codec_name` attribute."
92-
) # pragma: no cover
93-
unprefixed_codec_name = _expect_name_prefix(self.codec_name)
94-
95-
if "id" not in codec_config:
96-
codec_config = {"id": unprefixed_codec_name, **codec_config}
97-
elif codec_config["id"] != unprefixed_codec_name:
98-
raise ValueError(
99-
f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}."
100-
) # pragma: no cover
101-
102-
object.__setattr__(self, "codec_config", codec_config)
103-
warn(
104-
"Numcodecs codecs are not in the Zarr version 3 specification and "
105-
"may not be supported by other zarr implementations.",
106-
category=ZarrUserWarning,
107-
stacklevel=2,
108-
)
69+
class _NumcodecsCodec:
70+
codec_cls: type[Numcodec]
10971

11072
@cached_property
11173
def _codec(self) -> Numcodec:
112-
return get_numcodec(self.codec_config) # type: ignore[arg-type]
113-
114-
@classmethod
115-
def from_dict(cls, data: dict[str, JSON]) -> Self:
116-
codec_config = _parse_codec_configuration(data)
117-
return cls(**codec_config)
118-
119-
def to_dict(self) -> dict[str, JSON]:
120-
return self.to_json(zarr_format=3)
74+
return get_numcodec(self.to_json(zarr_format=2)) # type: ignore[arg-type]
12175

12276
@overload
12377
def to_json(self, zarr_format: Literal[2]) -> CodecJSON_V2[str]: ...
@@ -134,20 +88,8 @@ def to_json(
13488
else:
13589
return {"name": codec_id, "configuration": codec_config}
13690

137-
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
138-
raise NotImplementedError # pragma: no cover
139-
140-
# Override __repr__ because dynamically constructed classes don't seem to work otherwise
141-
def __repr__(self) -> str:
142-
codec_config = self.codec_config.copy()
143-
codec_config.pop("id", None)
144-
return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})"
145-
14691

14792
class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec):
148-
def __init__(self, **codec_config: JSON) -> None:
149-
super().__init__(**codec_config)
150-
15193
async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
15294
return await asyncio.to_thread(
15395
as_numpy_array_wrapper,
@@ -167,9 +109,6 @@ async def _encode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buf
167109

168110

169111
class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec):
170-
def __init__(self, **codec_config: JSON) -> None:
171-
super().__init__(**codec_config)
172-
173112
async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
174113
chunk_ndarray = chunk_data.as_ndarray_like()
175114
out = await asyncio.to_thread(self._codec.decode, chunk_ndarray)
@@ -197,35 +136,37 @@ async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> B
197136

198137

199138
# bytes-to-bytes codecs
200-
class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"):
201-
pass
139+
class Blosc(_NumcodecsBytesBytesCodec):
140+
codec_name = "blosc"
202141

203142

204-
class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"):
205-
pass
143+
class LZ4(_NumcodecsBytesBytesCodec):
144+
codec_name = "lz4"
206145

207146

208-
class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"):
209-
pass
147+
class Zstd(_NumcodecsBytesBytesCodec):
148+
codec_name = "zstd"
210149

211150

212-
class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"):
213-
pass
151+
class Zlib(_NumcodecsBytesBytesCodec):
152+
codec_name = "zlib"
214153

215154

216-
class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"):
217-
pass
155+
class GZip(_NumcodecsBytesBytesCodec):
156+
codec_name = "gzip"
218157

219158

220-
class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"):
221-
pass
159+
class BZ2(_NumcodecsBytesBytesCodec):
160+
codec_name = "bz2"
222161

223162

224-
class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"):
225-
pass
163+
class LZMA(_NumcodecsBytesBytesCodec):
164+
codec_name = "lzma"
226165

227166

228-
class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"):
167+
class Shuffle(_NumcodecsBytesBytesCodec):
168+
codec_name = "shuffle"
169+
229170
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:
230171
if self.codec_config.get("elementsize") is None:
231172
dtype = array_spec.dtype.to_native_dtype()
@@ -234,19 +175,23 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:
234175

235176

236177
# array-to-array codecs ("filters")
237-
class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"):
178+
class Delta(_NumcodecsArrayArrayCodec):
179+
codec_name = "delta"
180+
238181
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
239182
if astype := self.codec_config.get("astype"):
240183
dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[call-overload]
241184
return replace(chunk_spec, dtype=dtype)
242185
return chunk_spec
243186

244187

245-
class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"):
246-
pass
188+
class BitRound(_NumcodecsArrayArrayCodec):
189+
codec_name = "bitround"
247190

248191

249-
class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"):
192+
class FixedScaleOffset(_NumcodecsArrayArrayCodec):
193+
codec_name = "fixedscaleoffset"
194+
250195
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
251196
if astype := self.codec_config.get("astype"):
252197
dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[call-overload]
@@ -260,18 +205,19 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:
260205
return self
261206

262207

263-
class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"):
264-
def __init__(self, **codec_config: JSON) -> None:
265-
super().__init__(**codec_config)
208+
class Quantize(_NumcodecsArrayArrayCodec):
209+
codec_name = "quantize"
266210

267-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize:
211+
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
268212
if self.codec_config.get("dtype") is None:
269213
dtype = array_spec.dtype.to_native_dtype()
270214
return Quantize(**{**self.codec_config, "dtype": str(dtype)})
271215
return self
272216

273217

274-
class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"):
218+
class PackBits(_NumcodecsArrayArrayCodec):
219+
codec_name = "packbits"
220+
275221
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
276222
return replace(
277223
chunk_spec,
@@ -288,7 +234,9 @@ def validate(self, *, dtype: ZDType[Any, Any], **_kwargs: Any) -> None:
288234
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")
289235

290236

291-
class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"):
237+
class AsType(_NumcodecsArrayArrayCodec):
238+
codec_name = "astype"
239+
292240
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
293241
dtype = parse_dtype(np.dtype(self.codec_config["encode_dtype"]), zarr_format=3) # type: ignore[arg-type]
294242
return replace(chunk_spec, dtype=dtype)
@@ -307,30 +255,30 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) ->
307255
return input_byte_length + 4 # pragma: no cover
308256

309257

310-
class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):
311-
pass
258+
class CRC32(_NumcodecsChecksumCodec):
259+
codec_name = "crc32"
312260

313261

314-
class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"):
315-
pass
262+
class CRC32C(_NumcodecsChecksumCodec):
263+
codec_name = "crc32c"
316264

317265

318-
class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"):
319-
pass
266+
class Adler32(_NumcodecsChecksumCodec):
267+
codec_name = "adler32"
320268

321269

322-
class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"):
323-
pass
270+
class Fletcher32(_NumcodecsChecksumCodec):
271+
codec_name = "fletcher32"
324272

325273

326-
class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"):
327-
pass
274+
class JenkinsLookup3(_NumcodecsChecksumCodec):
275+
codec_name = "jenkins_lookup3"
328276

329277

330278
# array-to-bytes codecs
331-
class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"):
332-
pass
279+
class PCodec(_NumcodecsArrayBytesCodec):
280+
codec_name = "pcodec"
333281

334282

335-
class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"):
336-
pass
283+
class ZFPY(_NumcodecsArrayBytesCodec):
284+
codec_name = "zfpy"

0 commit comments

Comments
 (0)