Skip to content

Commit ce28794

Browse files
committed
pull array-array codecs out into their own modules, and design codec JSON properly
1 parent e769f31 commit ce28794

File tree

6 files changed

+266
-99
lines changed

6 files changed

+266
-99
lines changed

src/zarr/codecs/numcodecs/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@
1010
LZMA,
1111
ZFPY,
1212
Adler32,
13-
AsType,
1413
BitRound,
1514
Blosc,
16-
FixedScaleOffset,
1715
Fletcher32,
1816
GZip,
1917
JenkinsLookup3,
@@ -28,10 +26,12 @@
2826
_NumcodecsBytesBytesCodec,
2927
_NumcodecsCodec,
3028
)
29+
from zarr.codecs.numcodecs.astype import AsType
3130
from zarr.codecs.numcodecs.delta import Delta
31+
from zarr.codecs.numcodecs.fixed_scale_offset import FixedScaleOffset
3232

3333
# This is a fixed dictionary of numcodecs codecs for which we have pre-made Zarr V3 wrappers
34-
numcodecs_wrappers: Final[dict[str, type[_NumcodecsCodec]]] = {
34+
NUMCODECS_WRAPPERS: Final[dict[str, type[_NumcodecsCodec]]] = {
3535
"bz2": BZ2,
3636
"crc32": CRC32,
3737
"crc32c": CRC32C,

src/zarr/codecs/numcodecs/_codecs.py

Lines changed: 1 addition & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
_check_codecjson_v2,
6262
product,
6363
)
64-
from zarr.dtype import UInt8, ZDType, parse_dtype
64+
from zarr.dtype import UInt8, ZDType
6565
from zarr.errors import ZarrUserWarning
6666
from zarr.registry import get_numcodec
6767

@@ -75,7 +75,6 @@
7575
from zarr.codecs.zstd import ZstdConfig_V3, ZstdJSON_V2, ZstdJSON_V3
7676
from zarr.core.array_spec import ArraySpec
7777
from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
78-
from zarr.core.dtype.common import DTypeSpec_V2, DTypeSpec_V3
7978

8079

8180
# TypedDict definitions for V2 and V3 JSON representations
@@ -159,20 +158,6 @@ class BitRoundConfig(TypedDict):
159158
keepbits: int
160159

161160

162-
class FixedScaleOffsetConfig_V2(TypedDict):
163-
dtype: NotRequired[DTypeSpec_V2]
164-
astype: NotRequired[DTypeSpec_V2]
165-
scale: NotRequired[float]
166-
offset: NotRequired[float]
167-
168-
169-
class FixedScaleOffsetConfig_V3(TypedDict):
170-
dtype: NotRequired[DTypeSpec_V3]
171-
astype: NotRequired[DTypeSpec_V2]
172-
scale: NotRequired[float]
173-
offset: NotRequired[float]
174-
175-
176161
class QuantizeConfig(TypedDict):
177162
digits: int
178163
dtype: NotRequired[str]
@@ -182,11 +167,6 @@ class PackBitsConfig(TypedDict):
182167
pass # PackBits has no configuration parameters
183168

184169

185-
class AsTypeConfig(TypedDict):
186-
encode_dtype: str
187-
decode_dtype: str
188-
189-
190170
class BitRoundJSON_V2(BitRoundConfig):
191171
"""JSON representation of BitRound codec for Zarr V2."""
192172

@@ -197,18 +177,6 @@ class BitRoundJSON_V3(NamedRequiredConfig[Literal["bitround"], BitRoundConfig]):
197177
"""JSON representation of BitRound codec for Zarr V3."""
198178

199179

200-
class FixedScaleOffsetJSON_V2(FixedScaleOffsetConfig_V2):
201-
"""JSON representation of FixedScaleOffset codec for Zarr V2."""
202-
203-
id: ReadOnly[Literal["fixedscaleoffset"]]
204-
205-
206-
class FixedScaleOffsetJSON_V3(
207-
NamedRequiredConfig[Literal["fixedscaleoffset"], FixedScaleOffsetConfig_V3]
208-
):
209-
"""JSON representation of FixedScaleOffset codec for Zarr V3."""
210-
211-
212180
class QuantizeJSON_V2(QuantizeConfig):
213181
"""JSON representation of Quantize codec for Zarr V2."""
214182

@@ -229,16 +197,6 @@ class PackBitsJSON_V3(NamedRequiredConfig[Literal["packbits"], PackBitsConfig]):
229197
"""JSON representation of PackBits codec for Zarr V3."""
230198

231199

232-
class AsTypeJSON_V2(AsTypeConfig):
233-
"""JSON representation of AsType codec for Zarr V2."""
234-
235-
id: ReadOnly[Literal["astype"]]
236-
237-
238-
class AsTypeJSON_V3(NamedRequiredConfig[Literal["astype"], AsTypeConfig]):
239-
"""JSON representation of AsType codec for Zarr V3."""
240-
241-
242200
# Checksum codec JSON representations
243201
class Crc32Config(TypedDict):
244202
"""Configuration parameters for CRC32 codec."""
@@ -601,32 +559,6 @@ def to_json(self, zarr_format: ZarrFormat) -> BitRoundJSON_V2 | BitRoundJSON_V3:
601559
return super().to_json(zarr_format) # type: ignore[return-value]
602560

603561

604-
class FixedScaleOffset(_NumcodecsArrayArrayCodec):
605-
codec_name = "numcodecs.fixedscaleoffset"
606-
_codec_id = "fixedscaleoffset"
607-
codec_config: FixedScaleOffsetConfig_V2
608-
609-
@overload
610-
def to_json(self, zarr_format: Literal[2]) -> FixedScaleOffsetJSON_V2: ...
611-
@overload
612-
def to_json(self, zarr_format: Literal[3]) -> FixedScaleOffsetJSON_V3: ...
613-
def to_json(self, zarr_format: ZarrFormat) -> FixedScaleOffsetJSON_V2 | FixedScaleOffsetJSON_V3:
614-
_warn_unstable_specification(self)
615-
return super().to_json(zarr_format) # type: ignore[return-value]
616-
617-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
618-
if astype := self.codec_config.get("astype"):
619-
dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[arg-type]
620-
return replace(chunk_spec, dtype=dtype)
621-
return chunk_spec
622-
623-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
624-
if self.codec_config.get("dtype") is None:
625-
dtype = array_spec.dtype.to_native_dtype()
626-
return type(self)(**{**self.codec_config, "dtype": str(dtype)})
627-
return self
628-
629-
630562
class Quantize(_NumcodecsArrayArrayCodec):
631563
codec_name = "numcodecs.quantize"
632564
_codec_id = "quantize"
@@ -676,31 +608,6 @@ def validate(self, *, dtype: ZDType[Any, Any], **_kwargs: Any) -> None:
676608
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")
677609

678610

679-
class AsType(_NumcodecsArrayArrayCodec):
680-
codec_name = "numcodecs.astype"
681-
_codec_id = "astype"
682-
codec_config: AsTypeConfig
683-
684-
@overload
685-
def to_json(self, zarr_format: Literal[2]) -> AsTypeJSON_V2: ...
686-
@overload
687-
def to_json(self, zarr_format: Literal[3]) -> AsTypeJSON_V3: ...
688-
def to_json(self, zarr_format: ZarrFormat) -> AsTypeJSON_V2 | AsTypeJSON_V3:
689-
_warn_unstable_specification(self)
690-
return super().to_json(zarr_format) # type: ignore[return-value]
691-
692-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
693-
dtype = parse_dtype(np.dtype(self.codec_config["encode_dtype"]), zarr_format=3)
694-
return replace(chunk_spec, dtype=dtype)
695-
696-
def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType:
697-
if self.codec_config.get("decode_dtype") is None:
698-
# TODO: remove these coverage exemptions the correct way, i.e. with tests
699-
dtype = array_spec.dtype.to_native_dtype() # pragma: no cover
700-
return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover
701-
return self
702-
703-
704611
# bytes-to-bytes checksum codecs
705612
class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec):
706613
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
from collections.abc import Mapping
2+
from dataclasses import replace
3+
from typing import Literal, Self, TypedDict, TypeGuard, overload
4+
5+
from typing_extensions import ReadOnly
6+
7+
from zarr.codecs.numcodecs._codecs import _NumcodecsArrayArrayCodec, _warn_unstable_specification
8+
from zarr.core.array_spec import ArraySpec
9+
from zarr.core.common import (
10+
CodecJSON,
11+
CodecJSON_V2,
12+
CodecJSON_V3,
13+
NamedRequiredConfig,
14+
ZarrFormat,
15+
_check_codecjson_v2,
16+
_check_codecjson_v3,
17+
)
18+
from zarr.core.dtype import parse_dtype
19+
from zarr.core.dtype.common import (
20+
DTypeName_V2,
21+
DTypeSpec_V3,
22+
check_dtype_name_v2,
23+
check_dtype_spec_v3,
24+
)
25+
26+
27+
class AsTypeConfig_V2(TypedDict):
28+
encode_dtype: DTypeName_V2
29+
decode_dtype: DTypeName_V2
30+
31+
32+
class AsTypeConfig_V3(TypedDict):
33+
encode_dtype: DTypeSpec_V3
34+
decode_dtype: DTypeSpec_V3
35+
36+
37+
class AsTypeJSON_V2(AsTypeConfig_V2):
38+
"""JSON representation of AsType codec for Zarr V2."""
39+
40+
id: ReadOnly[Literal["astype"]]
41+
42+
43+
class AsTypeJSON_V3(NamedRequiredConfig[Literal["astype"], AsTypeConfig_V3]):
44+
"""JSON representation of AsType codec for Zarr V3."""
45+
46+
47+
def check_json_v2(data: object) -> TypeGuard[AsTypeJSON_V2]:
48+
"""
49+
A type guard for the Zarr V2 form of the Astype codec JSON
50+
"""
51+
return (
52+
_check_codecjson_v2(data)
53+
and data["id"] == "astype"
54+
and "encode_dtype" in data
55+
and "decode_dtype" in data
56+
and check_dtype_name_v2(data["encode_dtype"]) # type: ignore[typeddict-item]
57+
and check_dtype_name_v2(data["decode_dtype"]) # type: ignore[typeddict-item]
58+
)
59+
60+
61+
def check_json_v3(data: object) -> TypeGuard[AsTypeJSON_V3]:
62+
"""
63+
A type guard for the Zarr V3 form of the Astype codec JSON
64+
"""
65+
return (
66+
_check_codecjson_v3(data)
67+
and isinstance(data, Mapping)
68+
and data["name"] == "astype"
69+
and "configuration" in data
70+
and "encode_dtype" in data["configuration"]
71+
and "decode_dtype" in data["configuration"]
72+
and check_dtype_spec_v3(data["configuration"]["decode_dtype"])
73+
and check_dtype_spec_v3(data["configuration"]["encode_dtype"])
74+
)
75+
76+
77+
class AsType(_NumcodecsArrayArrayCodec):
78+
"""
79+
A wrapper around the numcodecs.Astype codec that provides Zarr V3 compatibility.
80+
81+
This class does not have a stable API.
82+
"""
83+
84+
codec_name = "numcodecs.astype"
85+
_codec_id = "astype"
86+
codec_config: AsTypeConfig_V2
87+
88+
@overload
89+
def to_json(self, zarr_format: Literal[2]) -> AsTypeJSON_V2: ...
90+
@overload
91+
def to_json(self, zarr_format: Literal[3]) -> AsTypeJSON_V3: ...
92+
def to_json(self, zarr_format: ZarrFormat) -> AsTypeJSON_V2 | AsTypeJSON_V3:
93+
_warn_unstable_specification(self)
94+
return super().to_json(zarr_format) # type: ignore[return-value]
95+
96+
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
97+
dtype = parse_dtype(self.codec_config["encode_dtype"], zarr_format=3)
98+
return replace(chunk_spec, dtype=dtype)
99+
100+
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
101+
dtype = array_spec.dtype.to_native_dtype() # pragma: no cover
102+
return type(self)(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover
103+
104+
@classmethod
105+
def _from_json_v2(cls, data: CodecJSON_V2) -> Self:
106+
return cls(**data)
107+
108+
@classmethod
109+
def _from_json_v3(cls, data: CodecJSON_V3) -> Self:
110+
if check_json_v3(data):
111+
config = data["configuration"]
112+
encode_dtype = parse_dtype(config["encode_dtype"], zarr_format=3).to_json(
113+
zarr_format=2
114+
)["name"]
115+
decode_dtype = parse_dtype(config["decode_dtype"], zarr_format=3).to_json(
116+
zarr_format=2
117+
)["name"]
118+
119+
return cls(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
120+
raise TypeError(f"Invalid JSON: {data}")
121+
122+
@classmethod
123+
def from_json(cls, data: CodecJSON) -> Self:
124+
if _check_codecjson_v2(data):
125+
return cls._from_json_v2(data)
126+
return cls._from_json_v3(data)

0 commit comments

Comments
 (0)