Skip to content

Commit fe21d7c

Browse files
committed
fixup
1 parent a134233 commit fe21d7c

File tree

17 files changed

+311
-250
lines changed

17 files changed

+311
-250
lines changed

src/zarr/abc/codec.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,30 @@ class CodecJSON_V2(TypedDict):
5353

5454

5555
def _check_codecjson_v2(data: object) -> TypeIs[CodecJSON_V2]:
56+
"""
57+
A type narrowing function for the CodecJSON_V2 type
58+
"""
5659
return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
5760

5861

5962
CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
6063
"""The JSON representation of a codec for Zarr V3."""
6164

65+
66+
def _check_codecjson_v3(data: object) -> TypeIs[CodecJSON_V3]:
67+
"""
68+
A type narrowing function for the CodecJSON_V3 type
69+
"""
70+
if isinstance(data, str):
71+
return True
72+
return (
73+
isinstance(data, Mapping)
74+
and "name" in data
75+
and isinstance(data["name"], str)
76+
and isinstance(data.get("configuration", {}), Mapping)
77+
)
78+
79+
6280
# The widest type we will *accept* for a codec JSON
6381
# This covers v2 and v3
6482
CodecJSON = CodecJSON_V2 | CodecJSON_V3

src/zarr/codecs/_v2.py

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
BytesBytesCodec,
1414
CodecJSON,
1515
CodecJSON_V2,
16+
CodecJSON_V3,
17+
_check_codecjson_v2,
18+
_check_codecjson_v3,
1619
)
17-
from zarr.registry import _get_codec_v2, _get_codec_v3, get_ndbuffer_class
20+
from zarr.errors import CodecValidationError
21+
from zarr.registry import get_ndbuffer_class, get_numcodec
1822

1923
if TYPE_CHECKING:
2024
from zarr.abc.numcodec import Numcodec
@@ -26,6 +30,26 @@
2630
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
2731

2832

33+
def codec_json_v2_to_v3(data: CodecJSON_V2) -> CodecJSON_V3:
34+
"""
35+
Convert V2 codec JSON to V3 codec JSON
36+
"""
37+
name = data["id"]
38+
config = {k: v for k, v in data.items() if k != "id"}
39+
return {"name": name, "configuration": config}
40+
41+
42+
def codec_json_v3_to_v2(data: CodecJSON_V3) -> CodecJSON_V2:
43+
"""
44+
Convert V3 codec JSON to V2 codec JSON
45+
"""
46+
if isinstance(data, str):
47+
return {"id": data}
48+
name = data["name"]
49+
config = dict(data.get("configuration", {}))
50+
return {"id": name, **config} # type: ignore[typeddict-item]
51+
52+
2953
@dataclass(frozen=True)
3054
class V2Codec(ArrayBytesCodec):
3155
filters: tuple[Numcodec, ...] | None
@@ -111,7 +135,7 @@ def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec)
111135

112136

113137
@dataclass(frozen=True, kw_only=True)
114-
class NumcodecsWrapper:
138+
class NumcodecWrapper:
115139
codec: Numcodec
116140

117141
@overload
@@ -130,13 +154,26 @@ def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2 | NamedConfig[str, Ba
130154

131155
@classmethod
132156
def _from_json_v2(cls, data: CodecJSON) -> Self:
133-
codec = _get_codec_v2(data)
134-
return cls(codec=codec)
157+
if _check_codecjson_v2(data):
158+
codec = get_numcodec(data)
159+
return cls(codec=codec)
160+
msg = (
161+
"Invalid Zarr V2 JSON representation of a numcodecs codec. "
162+
f"Got {data!r}, expected a Mapping with an 'id' key"
163+
)
164+
raise CodecValidationError(msg)
135165

136166
@classmethod
137167
def _from_json_v3(cls, data: CodecJSON) -> Self:
138-
codec = _get_codec_v3(data)
139-
return cls(codec=codec)
168+
if _check_codecjson_v3(data):
169+
# convert to a v2 codec JSON
170+
codec = get_numcodec(codec_json_v3_to_v2(data))
171+
return cls(codec=codec)
172+
msg = (
173+
"Invalid Zarr V3 JSON representation of a codec. "
174+
f"Got {data!r}, expected a Mapping with an 'name' key"
175+
)
176+
raise CodecValidationError(msg)
140177

141178
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
142179
raise NotImplementedError
@@ -177,24 +214,24 @@ def validate(
177214

178215
def to_array_array(self) -> NumcodecsArrayArrayCodec:
179216
"""
180-
Use the ``_codec`` attribute to create a NumcodecsArrayArrayCodec.
217+
Use the ``codec`` attribute to create a NumcodecsArrayArrayCodec.
181218
"""
182219
return NumcodecsArrayArrayCodec(codec=self.codec)
183220

184221
def to_bytes_bytes(self) -> NumcodecsBytesBytesCodec:
185222
"""
186-
Use the ``_codec`` attribute to create a NumcodecsBytesBytesCodec.
223+
Use the ``codec`` attribute to create a NumcodecsBytesBytesCodec.
187224
"""
188225
return NumcodecsBytesBytesCodec(codec=self.codec)
189226

190227
def to_array_bytes(self) -> NumcodecsArrayBytesCodec:
191228
"""
192-
Use the ``_codec`` attribute to create a NumcodecsArrayBytesCodec.
229+
Use the ``codec`` attribute to create a NumcodecsArrayBytesCodec.
193230
"""
194231
return NumcodecsArrayBytesCodec(codec=self.codec)
195232

196233

197-
class NumcodecsBytesBytesCodec(NumcodecsWrapper, BytesBytesCodec):
234+
class NumcodecsBytesBytesCodec(NumcodecWrapper, BytesBytesCodec):
198235
async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
199236
from zarr.core.buffer.cpu import as_numpy_array_wrapper
200237

@@ -216,7 +253,7 @@ async def _encode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buf
216253

217254

218255
@dataclass(kw_only=True, frozen=True)
219-
class NumcodecsArrayArrayCodec(NumcodecsWrapper, ArrayArrayCodec):
256+
class NumcodecsArrayArrayCodec(NumcodecWrapper, ArrayArrayCodec):
220257
async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
221258
chunk_ndarray = chunk_data.as_ndarray_like()
222259
out = await asyncio.to_thread(self.codec.decode, chunk_ndarray)
@@ -229,7 +266,7 @@ async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> N
229266

230267

231268
@dataclass(kw_only=True, frozen=True)
232-
class NumcodecsArrayBytesCodec(NumcodecsWrapper, ArrayBytesCodec):
269+
class NumcodecsArrayBytesCodec(NumcodecWrapper, ArrayBytesCodec):
233270
async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
234271
chunk_bytes = chunk_data.to_bytes()
235272
out = await asyncio.to_thread(self.codec.decode, chunk_bytes)

src/zarr/codecs/blosc.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,29 +71,29 @@ class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]):
7171
"""
7272

7373

74-
def check_json_v2(data: CodecJSON) -> TypeGuard[BloscJSON_V2]:
74+
def check_json_v2(data: object) -> TypeGuard[BloscJSON_V2]:
7575
return (
7676
isinstance(data, Mapping)
7777
and set(data.keys()) == {"id", "clevel", "cname", "shuffle", "blocksize"}
78-
and data["id"] == "blosc" # type: ignore[typeddict-item]
78+
and data["id"] == "blosc"
7979
)
8080

8181

82-
def check_json_v3(data: CodecJSON) -> TypeGuard[BloscJSON_V3]:
82+
def check_json_v3(data: object) -> TypeGuard[BloscJSON_V3]:
8383
return (
8484
isinstance(data, Mapping)
8585
and set(data.keys()) == {"name", "configuration"}
86-
and data["name"] == "blosc" # type: ignore[typeddict-item]
87-
and isinstance(data["configuration"], Mapping) # type: ignore[typeddict-item]
88-
and set(data["configuration"].keys()) # type: ignore[typeddict-item]
86+
and data["name"] == "blosc"
87+
and isinstance(data["configuration"], Mapping)
88+
and set(data["configuration"].keys())
8989
== {"cname", "clevel", "shuffle", "blocksize", "typesize"}
9090
)
9191

9292

9393
def parse_cname(value: object) -> BloscCname:
9494
if value not in BLOSC_CNAME:
9595
raise ValueError(f"Value must be one of {BLOSC_CNAME}. Got {value} instead.")
96-
return value
96+
return value # type: ignore[return-value]
9797

9898

9999
# See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc
@@ -163,7 +163,7 @@ def __init__(
163163

164164
@classmethod
165165
def from_dict(cls, data: dict[str, JSON]) -> Self:
166-
return cls.from_json(data, zarr_format=3)
166+
return cls.from_json(data) # type: ignore[arg-type]
167167

168168
def to_dict(self) -> dict[str, JSON]:
169169
if self.shuffle is None:

src/zarr/codecs/bytes.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,23 @@ def parse_endianness(data: object) -> EndiannessStr:
5757
raise ValueError(f"Invalid endianness: {data!r}. Expected one of {ENDIANNESS_STR}")
5858

5959

60-
def check_json_v2(data: CodecJSON) -> TypeGuard[BytesJSON_V2]:
60+
def check_json_v2(data: object) -> TypeGuard[BytesJSON_V2]:
6161
return (
6262
isinstance(data, Mapping)
6363
and set(data.keys()) in ({"id", "endian"}, {"id"})
64-
and data["id"] == "bytes" # type: ignore[typeddict-item]
64+
and data["id"] == "bytes"
6565
)
6666

6767

68-
def check_json_v3(data: CodecJSON) -> TypeGuard[BytesJSON_V3]:
68+
def check_json_v3(data: object) -> TypeGuard[BytesJSON_V3]:
6969
return data == "bytes" or (
7070
(
7171
isinstance(data, Mapping)
7272
and set(data.keys()) in ({"name"}, {"name", "configuration"})
73-
and data["name"] == "bytes" # type: ignore[typeddict-item]
73+
and data["name"] == "bytes"
7474
)
7575
and isinstance(data.get("configuration", {}), Mapping)
76-
and set(data.get("configuration", {}).keys()) in ({"endian"}, set()) # type: ignore[attr-defined]
76+
and set(data.get("configuration", {}).keys()) in ({"endian"}, set())
7777
)
7878

7979

src/zarr/codecs/crc32c_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ class Crc32cJSON_V2(TypedDict):
3030
class Crc32cJSON_V3(NamedConfig[Literal["crc32c"], Crc32cConfig]): ...
3131

3232

33-
def check_json_v2(data: CodecJSON) -> TypeGuard[Crc32cJSON_V2]:
33+
def check_json_v2(data: object) -> TypeGuard[Crc32cJSON_V2]:
3434
return isinstance(data, Mapping) and set(data.keys()) == {"id"} and data["id"] == "crc32c"
3535

3636

37-
def check_json_v3(data: CodecJSON) -> TypeGuard[Crc32cJSON_V3]:
37+
def check_json_v3(data: object) -> TypeGuard[Crc32cJSON_V3]:
3838
return (
3939
isinstance(data, Mapping)
4040
and set(data.keys()) in ({"name", "configuration"}, {"name"})

src/zarr/codecs/sharding.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -131,34 +131,34 @@ def parse_index_location(data: object) -> ShardingCodecIndexLocation:
131131
return parse_enum(data, ShardingCodecIndexLocation)
132132

133133

134-
def check_json_v2(data: CodecJSON) -> TypeGuard[ShardingJSON_V2]:
134+
def check_json_v2(data: object) -> TypeGuard[ShardingJSON_V2]:
135135
return (
136136
isinstance(data, Mapping)
137137
and set(data.keys()) == {"id", "codecs", "chunk_shape"}
138-
and data["id"] == "sharding_indexed" # type: ignore[typeddict-item]
139-
and isinstance(data["chunk_shape"], Sequence) # type: ignore[typeddict-item]
140-
and not isinstance(data["chunk_shape"], str) # type: ignore[typeddict-item]
141-
and isinstance(data["codecs"], Sequence) # type: ignore[typeddict-item]
142-
and not isinstance(data["codecs"], str) # type: ignore[typeddict-item]
138+
and data["id"] == "sharding_indexed"
139+
and isinstance(data["chunk_shape"], Sequence)
140+
and not isinstance(data["chunk_shape"], str)
141+
and isinstance(data["codecs"], Sequence)
142+
and not isinstance(data["codecs"], str)
143143
)
144144

145145

146-
def check_json_v3(data: CodecJSON) -> TypeGuard[ShardingJSON_V3]:
146+
def check_json_v3(data: object) -> TypeGuard[ShardingJSON_V3]:
147147
# TODO: Automate this with a function that does runtime type checking on typeddicts.
148148
return (
149149
isinstance(data, Mapping)
150150
and set(data.keys()) == {"name", "configuration"}
151-
and data["name"] == "sharding_indexed" # type: ignore[typeddict-item]
152-
and isinstance(data["configuration"], Mapping) # type: ignore[typeddict-item]
153-
and set(data["configuration"].keys()) # type: ignore[typeddict-item]
151+
and data["name"] == "sharding_indexed"
152+
and isinstance(data["configuration"], Mapping)
153+
and set(data["configuration"].keys())
154154
== {"codecs", "chunk_shape", "index_codecs", "index_location"}
155-
and isinstance(data["configuration"]["chunk_shape"], Sequence) # type: ignore[typeddict-item]
156-
and not isinstance(data["configuration"]["chunk_shape"], str) # type: ignore[typeddict-item]
157-
and isinstance(data["configuration"]["codecs"], Sequence) # type: ignore[typeddict-item]
158-
and not isinstance(data["configuration"]["codecs"], str) # type: ignore[typeddict-item]
159-
and isinstance(data["configuration"]["index_codecs"], Sequence) # type: ignore[typeddict-item]
160-
and not isinstance(data["configuration"]["index_codecs"], str) # type: ignore[typeddict-item]
161-
and data["configuration"]["index_location"] in ("start", "end") # type: ignore[typeddict-item, operator]
155+
and isinstance(data["configuration"]["chunk_shape"], Sequence)
156+
and not isinstance(data["configuration"]["chunk_shape"], str)
157+
and isinstance(data["configuration"]["codecs"], Sequence)
158+
and not isinstance(data["configuration"]["codecs"], str)
159+
and isinstance(data["configuration"]["index_codecs"], Sequence)
160+
and not isinstance(data["configuration"]["index_codecs"], str)
161+
and data["configuration"]["index_location"] in ("start", "end")
162162
)
163163

164164

@@ -468,14 +468,16 @@ def __setstate__(self, state: dict[str, Any]) -> None:
468468

469469
@classmethod
470470
def from_dict(cls, data: dict[str, JSON]) -> Self:
471-
return cls.from_json(data) # type: ignore[arg-type]
471+
return cls.from_json(data) # type: ignore[arg-type]
472472

473473
@classmethod
474474
def _from_json_v2(cls, data: CodecJSON) -> Self:
475475
if check_json_v2(data):
476+
# TODO: Make these type: ignore statements can go away when we propagate the refined
477+
# type information higher in the API by removing `dict[str, JSON]`
476478
return cls(
477-
codecs=data["codecs"],
478-
index_codecs=data["index_codecs"],
479+
codecs=data["codecs"], # type: ignore[arg-type]
480+
index_codecs=data["index_codecs"], # type: ignore[arg-type]
479481
index_location=data["index_location"],
480482
chunk_shape=data["chunk_shape"],
481483
)
@@ -489,9 +491,9 @@ def _from_json_v2(cls, data: CodecJSON) -> Self:
489491
def _from_json_v3(cls, data: CodecJSON) -> Self:
490492
if check_json_v3(data):
491493
return cls(
492-
codecs=data["configuration"]["codecs"], # type: ignore[arg-type]
493-
index_codecs=data["configuration"]["index_codecs"], # type: ignore[arg-type]
494-
index_location=data["configuration"]["index_location"], # type: ignore[arg-type]
494+
codecs=data["configuration"]["codecs"], # type: ignore[arg-type]
495+
index_codecs=data["configuration"]["index_codecs"], # type: ignore[arg-type]
496+
index_location=data["configuration"]["index_location"],
495497
chunk_shape=data["configuration"]["chunk_shape"],
496498
)
497499
msg = (

src/zarr/codecs/transpose.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,23 @@ class TransposeJSON_V3(NamedRequiredConfig[Literal["transpose"], TransposeConfig
5050
"""
5151

5252

53-
def check_json_v2(data: CodecJSON) -> TypeGuard[TransposeJSON_V2]:
53+
def check_json_v2(data: object) -> TypeGuard[TransposeJSON_V2]:
5454
return (
5555
isinstance(data, Mapping)
5656
and set(data.keys()) == {"id", "configuration"}
57-
and data["id"] == "transpose" # type: ignore[typeddict-item]
58-
and isinstance(data["order"], Sequence) # type: ignore[typeddict-item]
59-
and not isinstance(data["order"], str) # type: ignore[typeddict-item]
57+
and data["id"] == "transpose"
58+
and isinstance(data["order"], Sequence)
59+
and not isinstance(data["order"], str)
6060
)
6161

6262

63-
def check_json_v3(data: CodecJSON) -> TypeGuard[TransposeJSON_V3]:
63+
def check_json_v3(data: object) -> TypeGuard[TransposeJSON_V3]:
6464
return (
6565
isinstance(data, Mapping)
6666
and set(data.keys()) == {"name", "configuration"}
67-
and data["name"] == "transpose" # type: ignore[typeddict-item]
68-
and isinstance(data["configuration"], Mapping) # type: ignore[typeddict-item]
69-
and set(data["configuration"].keys()) == {"order"} # type: ignore[typeddict-item]
67+
and data["name"] == "transpose"
68+
and isinstance(data["configuration"], Mapping)
69+
and set(data["configuration"].keys()) == {"order"}
7070
)
7171

7272

@@ -83,7 +83,7 @@ def __init__(self, *, order: Iterable[int]) -> None:
8383

8484
@classmethod
8585
def from_dict(cls, data: dict[str, JSON]) -> Self:
86-
return cls.from_json(data)
86+
return cls.from_json(data) # type: ignore[arg-type]
8787

8888
@classmethod
8989
def _from_json_v2(cls, data: CodecJSON) -> Self:

0 commit comments

Comments
 (0)