Skip to content

Commit 0c65e56

Browse files
committed
Return TypedDict instances from Metadata.to_dict()
Define TypedDict classes for metadata models that have a well-typed dict representation and set the return value of the the model's to_dict() method to the TypedDict
1 parent 046c174 commit 0c65e56

File tree

14 files changed

+240
-46
lines changed

14 files changed

+240
-46
lines changed

src/zarr/abc/codec.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from typing import TYPE_CHECKING, Any, Generic, TypeVar
4+
from typing import TYPE_CHECKING, Any, Generic, NotRequired, TypedDict, TypeVar
55

66
from zarr.abc.metadata import Metadata
77
from zarr.core.buffer import Buffer, NDBuffer
@@ -174,6 +174,22 @@ class BytesBytesCodec(BaseCodec[Buffer, Buffer]):
174174
Codec = ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec
175175

176176

177+
class CodecConfigDict(TypedDict):
178+
"""A dictionary representing a codec configuration."""
179+
180+
...
181+
182+
183+
T = TypeVar("T", bound=CodecConfigDict)
184+
185+
186+
class CodecDict(TypedDict, Generic[T]):
187+
"""A generic dictionary representing a codec."""
188+
189+
name: str
190+
configuration: NotRequired[T]
191+
192+
177193
class ArrayBytesCodecPartialDecodeMixin:
178194
"""Mixin for array-to-bytes codecs that implement partial decoding."""
179195

src/zarr/codecs/blosc.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from dataclasses import dataclass, replace
44
from enum import Enum
55
from functools import cached_property
6-
from typing import TYPE_CHECKING
6+
from typing import TYPE_CHECKING, cast
77

88
import numcodecs
99
from numcodecs.blosc import Blosc
1010

11-
from zarr.abc.codec import BytesBytesCodec
11+
from zarr.abc.codec import BytesBytesCodec, CodecConfigDict, CodecDict
1212
from zarr.core.buffer.cpu import as_numpy_array_wrapper
1313
from zarr.core.common import JSON, parse_enum, parse_named_configuration, to_thread
1414
from zarr.registry import register_codec
@@ -54,6 +54,22 @@ class BloscCname(Enum):
5454
zlib = "zlib"
5555

5656

57+
class BloscCodecConfigDict(CodecConfigDict):
58+
"""A dictionary representing a Blosc codec configuration."""
59+
60+
typesize: int
61+
cname: BloscCname
62+
clevel: int
63+
shuffle: BloscShuffle
64+
blocksize: int
65+
66+
67+
class BloscCodecDict(CodecDict[BloscCodecConfigDict]):
68+
"""A dictionary representing a Blosc codec."""
69+
70+
...
71+
72+
5773
# See https://zarr.readthedocs.io/en/stable/tutorial.html#configuring-blosc
5874
numcodecs.blosc.use_threads = False
5975

@@ -118,12 +134,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
118134
_, configuration_parsed = parse_named_configuration(data, "blosc")
119135
return cls(**configuration_parsed) # type: ignore[arg-type]
120136

121-
def to_dict(self) -> dict[str, JSON]:
137+
def to_dict(self) -> BloscCodecDict:
122138
if self.typesize is None:
123139
raise ValueError("`typesize` needs to be set for serialization.")
124140
if self.shuffle is None:
125141
raise ValueError("`shuffle` needs to be set for serialization.")
126-
return {
142+
out_dict = {
127143
"name": "blosc",
128144
"configuration": {
129145
"typesize": self.typesize,
@@ -134,6 +150,8 @@ def to_dict(self) -> dict[str, JSON]:
134150
},
135151
}
136152

153+
return cast(BloscCodecDict, out_dict)
154+
137155
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
138156
dtype = array_spec.dtype
139157
new_codec = self

src/zarr/codecs/bytes.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99

10-
from zarr.abc.codec import ArrayBytesCodec
10+
from zarr.abc.codec import ArrayBytesCodec, CodecConfigDict, CodecDict
1111
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
1212
from zarr.core.common import JSON, parse_enum, parse_named_configuration
1313
from zarr.registry import register_codec
@@ -30,6 +30,18 @@ class Endian(Enum):
3030
default_system_endian = Endian(sys.byteorder)
3131

3232

33+
class BytesCodecConfigDict(CodecConfigDict):
34+
"""A dictionary representing a bytes codec configuration."""
35+
36+
endian: Endian
37+
38+
39+
class BytesCodecDict(CodecDict[BytesCodecConfigDict]):
40+
"""A dictionary representing a bytes codec."""
41+
42+
...
43+
44+
3345
@dataclass(frozen=True)
3446
class BytesCodec(ArrayBytesCodec):
3547
is_fixed_size = True
@@ -49,11 +61,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
4961
configuration_parsed = configuration_parsed or {}
5062
return cls(**configuration_parsed) # type: ignore[arg-type]
5163

52-
def to_dict(self) -> dict[str, JSON]:
53-
if self.endian is None:
54-
return {"name": "bytes"}
55-
else:
56-
return {"name": "bytes", "configuration": {"endian": self.endian.value}}
64+
def to_dict(self) -> BytesCodecDict:
65+
out_dict: BytesCodecDict = {"name": "bytes"}
66+
if self.endian is not None:
67+
out_dict["configuration"] = {"endian": self.endian.value}
68+
69+
return out_dict
5770

5871
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
5972
if array_spec.dtype.itemsize == 0:

src/zarr/codecs/crc32c_.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import typing_extensions
88
from crc32c import crc32c
99

10-
from zarr.abc.codec import BytesBytesCodec
10+
from zarr.abc.codec import BytesBytesCodec, CodecConfigDict, CodecDict
1111
from zarr.core.common import JSON, parse_named_configuration
1212
from zarr.registry import register_codec
1313

@@ -18,6 +18,12 @@
1818
from zarr.core.buffer import Buffer
1919

2020

21+
class Crc32cCodecDict(CodecDict[CodecConfigDict]):
22+
"""A dictionary representing a CRC32C codec."""
23+
24+
...
25+
26+
2127
@dataclass(frozen=True)
2228
class Crc32cCodec(BytesBytesCodec):
2329
is_fixed_size = True
@@ -27,8 +33,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
2733
parse_named_configuration(data, "crc32c", require_configuration=False)
2834
return cls()
2935

30-
def to_dict(self) -> dict[str, JSON]:
31-
return {"name": "crc32c"}
36+
def to_dict(self) -> Crc32cCodecDict:
37+
out_dict = {"name": "crc32c"}
38+
return cast(Crc32cCodecDict, out_dict)
3239

3340
async def _decode_single(
3441
self,

src/zarr/codecs/gzip.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from __future__ import annotations
22

33
from dataclasses import dataclass
4-
from typing import TYPE_CHECKING
4+
from typing import TYPE_CHECKING, cast
55

66
from numcodecs.gzip import GZip
77

8-
from zarr.abc.codec import BytesBytesCodec
8+
from zarr.abc.codec import BytesBytesCodec, CodecConfigDict, CodecDict
99
from zarr.core.buffer.cpu import as_numpy_array_wrapper
1010
from zarr.core.common import JSON, parse_named_configuration, to_thread
1111
from zarr.registry import register_codec
@@ -17,6 +17,18 @@
1717
from zarr.core.buffer import Buffer
1818

1919

20+
class GzipCodecConfigDict(CodecConfigDict):
21+
"""A dictionary representing a gzip codec configuration."""
22+
23+
level: int
24+
25+
26+
class GzipCodecDict(CodecDict[GzipCodecConfigDict]):
27+
"""A dictionary representing a gzip codec."""
28+
29+
...
30+
31+
2032
def parse_gzip_level(data: JSON) -> int:
2133
if not isinstance(data, (int)):
2234
raise TypeError(f"Expected int, got {type(data)}")
@@ -43,8 +55,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
4355
_, configuration_parsed = parse_named_configuration(data, "gzip")
4456
return cls(**configuration_parsed) # type: ignore[arg-type]
4557

46-
def to_dict(self) -> dict[str, JSON]:
47-
return {"name": "gzip", "configuration": {"level": self.level}}
58+
def to_dict(self) -> GzipCodecDict:
59+
out_dict = {"name": "gzip", "configuration": {"level": self.level}}
60+
return cast(GzipCodecDict, out_dict)
4861

4962
async def _decode_single(
5063
self,

src/zarr/codecs/sharding.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
ArrayBytesCodecPartialDecodeMixin,
1616
ArrayBytesCodecPartialEncodeMixin,
1717
Codec,
18+
CodecConfigDict,
19+
CodecDict,
1820
CodecPipeline,
1921
)
2022
from zarr.abc.store import ByteGetter, ByteRangeRequest, ByteSetter
@@ -320,6 +322,21 @@ async def finalize(
320322
return await shard_builder.finalize(index_location, index_encoder)
321323

322324

325+
class ShardingCodecConfigDict(CodecConfigDict):
326+
"""A dictionary representing a sharding codec configuration."""
327+
328+
chunk_shape: list[int] # TODO: Double check this
329+
codecs: list[CodecDict[Any]]
330+
index_codecs: list[CodecDict[Any]]
331+
index_location: ShardingCodecIndexLocation
332+
333+
334+
class ShardingCodecDict(CodecDict[ShardingCodecConfigDict]):
335+
"""A dictionary representing a sharding codec."""
336+
337+
...
338+
339+
323340
@dataclass(frozen=True)
324341
class ShardingCodec(
325342
ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin
@@ -377,8 +394,8 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
377394
def codec_pipeline(self) -> CodecPipeline:
378395
return get_pipeline_class().from_codecs(self.codecs)
379396

380-
def to_dict(self) -> dict[str, JSON]:
381-
return {
397+
def to_dict(self) -> ShardingCodecDict:
398+
out_dict = {
382399
"name": "sharding_indexed",
383400
"configuration": {
384401
"chunk_shape": self.chunk_shape,
@@ -388,6 +405,8 @@ def to_dict(self) -> dict[str, JSON]:
388405
},
389406
}
390407

408+
return cast(ShardingCodecDict, out_dict)
409+
391410
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
392411
shard_spec = self._get_chunk_spec(array_spec)
393412
evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs)

src/zarr/codecs/transpose.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import numpy as np
88

9-
from zarr.abc.codec import ArrayArrayCodec
9+
from zarr.abc.codec import ArrayArrayCodec, CodecConfigDict, CodecDict
1010
from zarr.core.array_spec import ArraySpec
1111
from zarr.core.common import JSON, ChunkCoordsLike, parse_named_configuration
1212
from zarr.registry import register_codec
@@ -26,6 +26,18 @@ def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]:
2626
return tuple(cast(Iterable[int], data))
2727

2828

29+
class TransposeCodecConfigDict(CodecConfigDict):
30+
"""A dictionary representing a transpose codec configuration."""
31+
32+
order: list[int]
33+
34+
35+
class TransposeCodecDict(CodecDict[TransposeCodecConfigDict]):
36+
"""A dictionary representing a transpose codec."""
37+
38+
...
39+
40+
2941
@dataclass(frozen=True)
3042
class TransposeCodec(ArrayArrayCodec):
3143
is_fixed_size = True
@@ -42,8 +54,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
4254
_, configuration_parsed = parse_named_configuration(data, "transpose")
4355
return cls(**configuration_parsed) # type: ignore[arg-type]
4456

45-
def to_dict(self) -> dict[str, JSON]:
46-
return {"name": "transpose", "configuration": {"order": tuple(self.order)}}
57+
def to_dict(self) -> TransposeCodecDict:
58+
out_dict = {"name": "transpose", "configuration": {"order": tuple(self.order)}}
59+
return cast(TransposeCodecDict, out_dict)
4760

4861
def validate(self, shape: tuple[int, ...], dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
4962
if len(self.order) != len(shape):

src/zarr/codecs/zstd.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
from dataclasses import dataclass
44
from functools import cached_property
55
from importlib.metadata import version
6-
from typing import TYPE_CHECKING
6+
from typing import TYPE_CHECKING, cast
77

88
from numcodecs.zstd import Zstd
99

10-
from zarr.abc.codec import BytesBytesCodec
10+
from zarr.abc.codec import BytesBytesCodec, CodecConfigDict, CodecDict
1111
from zarr.core.buffer.cpu import as_numpy_array_wrapper
1212
from zarr.core.common import JSON, parse_named_configuration, to_thread
1313
from zarr.registry import register_codec
@@ -33,6 +33,19 @@ def parse_checksum(data: JSON) -> bool:
3333
raise TypeError(f"Expected bool. Got {type(data)}.")
3434

3535

36+
class ZstdCodecConfigDict(CodecConfigDict):
37+
"""A dictionary representing a zstd codec configuration."""
38+
39+
level: int
40+
checksum: bool
41+
42+
43+
class ZstdCodecDict(CodecDict[ZstdCodecConfigDict]):
44+
"""A dictionary representing a zstd codec."""
45+
46+
...
47+
48+
3649
@dataclass(frozen=True)
3750
class ZstdCodec(BytesBytesCodec):
3851
is_fixed_size = True
@@ -60,8 +73,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
6073
_, configuration_parsed = parse_named_configuration(data, "zstd")
6174
return cls(**configuration_parsed) # type: ignore[arg-type]
6275

63-
def to_dict(self) -> dict[str, JSON]:
64-
return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}}
76+
def to_dict(self) -> ZstdCodecDict:
77+
out_dict = {
78+
"name": "zstd",
79+
"configuration": {"level": self.level, "checksum": self.checksum},
80+
}
81+
return cast(ZstdCodecDict, out_dict)
6582

6683
@cached_property
6784
def _zstd_codec(self) -> Zstd:

0 commit comments

Comments
 (0)