Skip to content

Commit feb2047

Browse files
committed
type-safety and more improvements using delta codec as a template
1 parent 9510a5e commit feb2047

File tree

9 files changed

+157
-56
lines changed

9 files changed

+157
-56
lines changed

src/zarr/codecs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
Adler32,
1414
AsType,
1515
BitRound,
16-
Delta,
1716
FixedScaleOffset,
1817
Fletcher32,
1918
JenkinsLookup3,
@@ -23,6 +22,7 @@
2322
Shuffle,
2423
Zlib,
2524
)
25+
from zarr.codecs.numcodecs.delta import Delta
2626
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
2727
from zarr.codecs.transpose import TransposeCodec
2828
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec

src/zarr/codecs/numcodecs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
AsType,
1414
BitRound,
1515
Blosc,
16-
Delta,
1716
FixedScaleOffset,
1817
Fletcher32,
1918
GZip,
@@ -29,6 +28,7 @@
2928
_NumcodecsBytesBytesCodec,
3029
_NumcodecsCodec,
3130
)
31+
from zarr.codecs.numcodecs.delta import Delta
3232

3333
# This is a fixed dictionary of numcodecs codecs for which we have pre-made Zarr V3 wrappers
3434
numcodecs_wrappers: Final[dict[str, type[_NumcodecsCodec]]] = {

src/zarr/codecs/numcodecs/_codecs.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -155,16 +155,6 @@ class ShuffleJSON_V3(NamedRequiredConfig[Literal["shuffle"], ShuffleConfig]):
155155
"""JSON representation of Shuffle codec for Zarr V3."""
156156

157157

158-
class DeltaConfig_V2(TypedDict):
159-
dtype: DTypeSpec_V2
160-
astype: DTypeSpec_V2
161-
162-
163-
class DeltaConfig_V3(TypedDict):
164-
dtype: DTypeSpec_V3
165-
astype: DTypeSpec_V3
166-
167-
168158
class BitRoundConfig(TypedDict):
169159
keepbits: int
170160

@@ -197,17 +187,6 @@ class AsTypeConfig(TypedDict):
197187
decode_dtype: str
198188

199189

200-
# Array-to-array codec JSON representations
201-
class DeltaJSON_V2(DeltaConfig_V2):
202-
"""JSON representation of Delta codec for Zarr V2."""
203-
204-
id: ReadOnly[Literal["delta"]]
205-
206-
207-
class DeltaJSON_V3(NamedRequiredConfig[Literal["delta"], DeltaConfig_V3]):
208-
"""JSON representation of Delta codec for Zarr V3."""
209-
210-
211190
class BitRoundJSON_V2(BitRoundConfig):
212191
"""JSON representation of BitRound codec for Zarr V2."""
213192

@@ -608,31 +587,6 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
608587

609588

610589
# array-to-array codecs ("filters")
611-
class Delta(_NumcodecsArrayArrayCodec):
612-
codec_name = "numcodecs.delta"
613-
_codec_id = "delta"
614-
codec_config: DeltaConfig_V2 | DeltaConfig_V3
615-
616-
def __init__(self, **codec_config: Any) -> None:
617-
if "codec_config" in codec_config:
618-
raise ValueError("The argument 'codec_config' is not supported.")
619-
super().__init__(**codec_config)
620-
621-
@overload
622-
def to_json(self, zarr_format: Literal[2]) -> DeltaJSON_V2: ...
623-
@overload
624-
def to_json(self, zarr_format: Literal[3]) -> DeltaJSON_V3: ...
625-
def to_json(self, zarr_format: ZarrFormat) -> DeltaJSON_V2 | DeltaJSON_V3:
626-
_warn_unstable_specification(self)
627-
return super().to_json(zarr_format) # type: ignore[return-value]
628-
629-
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
630-
if astype := self.codec_config.get("astype"):
631-
dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[arg-type]
632-
return replace(chunk_spec, dtype=dtype)
633-
return chunk_spec
634-
635-
636590
class BitRound(_NumcodecsArrayArrayCodec):
637591
codec_name = "numcodecs.bitround"
638592
_codec_id = "bitround"

src/zarr/codecs/numcodecs/delta.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Mapping
4+
from dataclasses import replace
5+
from typing import TYPE_CHECKING, Any, Literal, Self, TypedDict, TypeGuard, overload
6+
7+
from typing_extensions import ReadOnly
8+
9+
from zarr.codecs.numcodecs._codecs import (
10+
_NumcodecsArrayArrayCodec,
11+
_warn_unstable_specification,
12+
)
13+
from zarr.core.common import (
14+
CodecJSON,
15+
CodecJSON_V2,
16+
CodecJSON_V3,
17+
NamedRequiredConfig,
18+
ZarrFormat,
19+
_check_codecjson_v2,
20+
_check_codecjson_v3,
21+
)
22+
from zarr.core.dtype.common import check_dtype_name_v2, check_dtype_spec_v3
23+
from zarr.dtype import parse_dtype
24+
25+
if TYPE_CHECKING:
26+
from zarr.core.array_spec import ArraySpec
27+
from zarr.core.dtype.common import DTypeName_V2, DTypeSpec_V3
28+
29+
30+
class DeltaConfig_V2(TypedDict):
31+
dtype: DTypeName_V2
32+
astype: DTypeName_V2
33+
34+
35+
class DeltaConfig_V3(TypedDict):
36+
dtype: DTypeSpec_V3
37+
astype: DTypeSpec_V3
38+
39+
40+
class DeltaJSON_V2(DeltaConfig_V2):
41+
"""JSON representation of Delta codec for Zarr V2."""
42+
43+
id: ReadOnly[Literal["delta"]]
44+
45+
46+
class DeltaJSON_V3(NamedRequiredConfig[Literal["delta"], DeltaConfig_V3]):
47+
"""JSON representation of Delta codec for Zarr V3."""
48+
49+
50+
def check_json_v2(data: object) -> TypeGuard[DeltaJSON_V2]:
51+
"""
52+
A type guard for the Zarr V2 form of the Delta codec JSON
53+
"""
54+
return (
55+
_check_codecjson_v2(data)
56+
and data["id"] == "delta"
57+
and "astype" in data
58+
and "dtype" in data
59+
and check_dtype_name_v2(data["dtype"]) # type: ignore[typeddict-item]
60+
and check_dtype_name_v2(data["astype"]) # type: ignore[typeddict-item]
61+
)
62+
63+
64+
def check_json_v3(data: object) -> TypeGuard[DeltaJSON_V3]:
65+
"""
66+
A type guard for the Zarr V3 form of the Delta codec JSON
67+
"""
68+
return (
69+
_check_codecjson_v3(data)
70+
and isinstance(data, Mapping)
71+
and data["name"] == "delta"
72+
and "configuration" in data
73+
and "astype" in data["configuration"]
74+
and "dtype" in data["configuration"]
75+
and check_dtype_spec_v3(data["configuration"]["dtype"])
76+
and check_dtype_spec_v3(data["configuration"]["astype"])
77+
)
78+
79+
80+
class Delta(_NumcodecsArrayArrayCodec):
81+
"""
82+
A wrapper around the numcodecs.Delta codec that provides Zarr V3 compatibility.
83+
84+
This class does not have a stable API.
85+
"""
86+
87+
codec_name = "numcodecs.delta"
88+
_codec_id = "delta"
89+
codec_config: DeltaJSON_V2
90+
91+
def __init__(self, **codec_config: Any) -> None:
92+
if "codec_config" in codec_config:
93+
raise ValueError("The argument 'codec_config' is not supported.")
94+
super().__init__(**codec_config)
95+
96+
@classmethod
97+
def _from_json_v2(cls, data: CodecJSON_V2) -> Self:
98+
return cls(**data)
99+
100+
@classmethod
101+
def _from_json_v3(cls, data: CodecJSON_V3) -> Self:
102+
if check_json_v3(data):
103+
config = data["configuration"]
104+
astype = parse_dtype(config["astype"], zarr_format=3).to_json(zarr_format=2)["name"]
105+
dtype = parse_dtype(config["dtype"], zarr_format=3).to_json(zarr_format=2)["name"]
106+
107+
return cls(astype=astype, dtype=dtype)
108+
raise TypeError(f"Invalid JSON: {data}")
109+
110+
@classmethod
111+
def from_json(cls, data: CodecJSON) -> Self:
112+
if _check_codecjson_v2(data):
113+
return cls._from_json_v2(data)
114+
return cls._from_json_v3(data)
115+
116+
@overload
117+
def to_json(self, zarr_format: Literal[2]) -> DeltaJSON_V2: ...
118+
@overload
119+
def to_json(self, zarr_format: Literal[3]) -> DeltaJSON_V3: ...
120+
def to_json(self, zarr_format: ZarrFormat) -> DeltaJSON_V2 | DeltaJSON_V3:
121+
_warn_unstable_specification(self)
122+
if zarr_format == 2:
123+
return self.codec_config
124+
conf = self.codec_config
125+
astype_v3 = parse_dtype(conf["astype"], zarr_format=2).to_json(zarr_format=3)
126+
dtype_v3 = parse_dtype(conf["dtype"], zarr_format=2).to_json(zarr_format=3)
127+
return {
128+
"name": "delta",
129+
"configuration": {"astype": astype_v3, "dtype": dtype_v3},
130+
}
131+
132+
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
133+
if astype := self.codec_config.get("astype"):
134+
dtype = parse_dtype(astype, zarr_format=3)
135+
return replace(chunk_spec, dtype=dtype)
136+
return chunk_spec

src/zarr/core/dtype/__init__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from zarr.core.dtype.common import (
77
DataTypeValidationError,
88
DTypeJSON,
9+
DTypeName_V2,
10+
DTypeSpec_V2,
911
)
1012
from zarr.core.dtype.npy.bool import Bool
1113
from zarr.core.dtype.npy.bytes import (
@@ -150,7 +152,14 @@
150152
VLEN_UTF8_ALIAS: Final = ("str", str, "string")
151153

152154
# This type models inputs that can be coerced to a ZDType
153-
ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | Mapping[str, JSON] | str
155+
ZDTypeLike: TypeAlias = (
156+
npt.DTypeLike
157+
| ZDType[TBaseDType, TBaseScalar]
158+
| Mapping[str, JSON]
159+
| Mapping[str, object]
160+
| DTypeSpec_V2
161+
| DTypeName_V2
162+
)
154163

155164
for dtype in ANY_DTYPE:
156165
# mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType

tests/test_cli/test_migrate_v3.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
from zarr.codecs.blosc import BloscCodec
1414
from zarr.codecs.bytes import BytesCodec
1515
from zarr.codecs.gzip import GzipCodec
16-
from zarr.codecs.numcodecs import LZMA, Delta
16+
from zarr.codecs.numcodecs import LZMA
17+
from zarr.codecs.numcodecs.delta import Delta
1718
from zarr.codecs.transpose import TransposeCodec
1819
from zarr.codecs.zstd import ZstdCodec
1920
from zarr.core.array import Array

tests/test_codecs/test_adler32.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ class TestAdler32Codec(BaseTestCodec):
99
test_cls = _numcodecs.Adler32
1010
valid_json_v2 = (
1111
{"id": "adler32"},
12-
{"id": "adler32", "location": "start"},
13-
{"id": "adler32", "location": "end"},
12+
{"id": "adler32", "location": "start"}, # type: ignore[typeddict-unknown-key]
13+
{"id": "adler32", "location": "end"}, # type: ignore[typeddict-unknown-key]
1414
)
1515
valid_json_v3 = (
1616
{"name": "adler32", "configuration": {}},

tests/test_codecs/test_delta.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from tests.test_codecs.conftest import BaseTestCodec
4-
from zarr.codecs import numcodecs as _numcodecs
4+
from zarr.codecs.numcodecs import delta as _numcodecs
55

66

77
@pytest.mark.filterwarnings("ignore::zarr.errors.ZarrUserWarning")
@@ -11,6 +11,6 @@ class TestDeltaCodec(BaseTestCodec):
1111
valid_json_v3 = (
1212
{
1313
"name": "delta",
14-
"configuration": {"dtype": "|u1", "astype": "|u1"},
14+
"configuration": {"dtype": "uint16", "astype": "uint8"},
1515
},
1616
)

tests/test_codecs/test_numcodecs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99
from numcodecs import GZip
1010

11+
import zarr.codecs.numcodecs.delta
1112
from zarr import config, create_array, open_array
1213
from zarr.abc.numcodec import Numcodec, _is_numcodec_cls
1314
from zarr.codecs import numcodecs as _numcodecs
@@ -148,7 +149,7 @@ def test_generic_compressor(codec_class: type[_numcodecs._NumcodecsBytesBytesCod
148149
@pytest.mark.parametrize(
149150
("codec_class", "codec_config"),
150151
[
151-
(_numcodecs.Delta, {"dtype": "float32"}),
152+
(zarr.codecs.numcodecs.delta.Delta, {"dtype": "float32"}),
152153
(_numcodecs.FixedScaleOffset, {"offset": 0, "scale": 25.5, "dtype": "float32"}),
153154
(_numcodecs.FixedScaleOffset, {"offset": 0, "scale": 51, "dtype": "float32"}),
154155
(_numcodecs.AsType, {"encode_dtype": "float32", "decode_dtype": "float32"}),
@@ -288,7 +289,7 @@ def test_delta_astype() -> None:
288289
dtype=data.dtype,
289290
fill_value=0,
290291
filters=[
291-
_numcodecs.Delta(dtype="i8", astype="i2"),
292+
zarr.codecs.numcodecs.delta.Delta(dtype="i8", astype="i2"),
292293
],
293294
)
294295

0 commit comments

Comments
 (0)