Skip to content

Commit 4fe9ae4

Browse files
committed
remove cache poisoing bug, and deploy type checker throughout the codebase
1 parent 85b48df commit 4fe9ae4

File tree

23 files changed

+499
-471
lines changed

23 files changed

+499
-471
lines changed

examples/custom_dtype.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
DTypeConfig_V2,
3030
DTypeJSON,
3131
)
32-
from zarr.core.type_check import check_type
32+
from zarr.core.type_check import guard_type
3333

3434
# This is the int2 array data type
3535
int2_dtype_cls = type(np.dtype("int2"))
@@ -84,7 +84,7 @@ def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[Literal["in
8484
8585
See the Zarr docs for more information about the JSON encoding for data types.
8686
"""
87-
return check_type(data, DTypeConfig_V2[Literal["int2"], None]).success
87+
return guard_type(data, DTypeConfig_V2[Literal["int2"], None])
8888

8989
@classmethod
9090
def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["int2"]]:

src/zarr/abc/codec.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from collections.abc import Mapping
5-
from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar
6-
7-
from typing_extensions import ReadOnly, TypedDict
4+
from typing import TYPE_CHECKING, Generic, TypeVar
85

96
from zarr.abc.metadata import Metadata
107
from zarr.core.buffer import Buffer, NDBuffer
11-
from zarr.core.common import NamedConfig, concurrent_map
8+
from zarr.core.common import concurrent_map
129
from zarr.core.config import config
1310

1411
if TYPE_CHECKING:
@@ -37,27 +34,6 @@
3734
CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer)
3835
CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer)
3936

40-
TName = TypeVar("TName", bound=str, covariant=True)
41-
42-
43-
class CodecJSON_V2(TypedDict, Generic[TName]):
44-
"""The JSON representation of a codec for Zarr V2"""
45-
46-
id: ReadOnly[TName]
47-
48-
49-
def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]:
50-
return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
51-
52-
53-
CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
54-
"""The JSON representation of a codec for Zarr V3."""
55-
56-
# The widest type we will *accept* for a codec JSON
57-
# This covers v2 and v3
58-
CodecJSON = str | Mapping[str, object]
59-
"""The widest type of JSON-like input that could specify a codec."""
60-
6137

6238
class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]):
6339
"""Generic base class for codecs.

src/zarr/api/asynchronous.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import asyncio
44
import dataclasses
55
import warnings
6-
from typing import TYPE_CHECKING, Any, Literal, cast
6+
from typing import TYPE_CHECKING, Any, Literal
77

88
import numpy as np
99
import numpy.typing as npt
@@ -37,7 +37,7 @@
3737
GroupMetadata,
3838
create_hierarchy,
3939
)
40-
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
40+
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
4141
from zarr.errors import (
4242
GroupNotFoundError,
4343
NodeTypeValidationError,
@@ -352,13 +352,12 @@ async def open(
352352
try:
353353
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
354354
# TODO: remove this cast when we fix typing for array metadata dicts
355-
_metadata_dict = cast("ArrayMetadataDict", metadata_dict)
356355
# for v2, the above would already have raised an exception if not an array
357-
zarr_format = _metadata_dict["zarr_format"]
358-
is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array"
356+
zarr_format = metadata_dict["zarr_format"]
357+
is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array"
359358
if is_v3_array or zarr_format == 2:
360359
return AsyncArray(
361-
store_path=store_path, metadata=_metadata_dict, config=kwargs.get("config")
360+
store_path=store_path, metadata=metadata_dict, config=kwargs.get("config")
362361
)
363362
except (AssertionError, FileNotFoundError, NodeTypeValidationError):
364363
pass

src/zarr/core/array.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
ZARR_JSON,
5454
ZARRAY_JSON,
5555
ZATTRS_JSON,
56+
ArrayMetadataJSON_V2,
57+
ArrayMetadataJSON_V3,
5658
DimensionNames,
5759
MemoryOrder,
5860
ShapeLike,
@@ -102,11 +104,8 @@
102104
)
103105
from zarr.core.metadata import (
104106
ArrayMetadata,
105-
ArrayMetadataDict,
106107
ArrayV2Metadata,
107-
ArrayV2MetadataDict,
108108
ArrayV3Metadata,
109-
ArrayV3MetadataDict,
110109
T_ArrayMetadata,
111110
)
112111
from zarr.core.metadata.v2 import (
@@ -115,9 +114,14 @@
115114
parse_compressor,
116115
parse_filters,
117116
)
118-
from zarr.core.metadata.v3 import parse_node_type_array
119117
from zarr.core.sync import sync
120-
from zarr.errors import MetadataValidationError, ZarrDeprecationWarning, ZarrUserWarning
118+
from zarr.core.type_check import check_type
119+
from zarr.errors import (
120+
MetadataValidationError,
121+
NodeTypeValidationError,
122+
ZarrDeprecationWarning,
123+
ZarrUserWarning,
124+
)
121125
from zarr.registry import (
122126
_parse_array_array_codec,
123127
_parse_array_bytes_codec,
@@ -176,12 +180,6 @@ def parse_array_metadata(data: Any) -> ArrayMetadata:
176180
zarr_format = data.get("zarr_format")
177181
if zarr_format == 3:
178182
meta_out = ArrayV3Metadata.from_dict(data)
179-
if len(meta_out.storage_transformers) > 0:
180-
msg = (
181-
f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
182-
"Arrays with storage transformers are not supported in zarr-python at this time."
183-
)
184-
raise ValueError(msg)
185183
return meta_out
186184
elif zarr_format == 2:
187185
return ArrayV2Metadata.from_dict(data)
@@ -207,9 +205,27 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None
207205
raise TypeError # pragma: no cover
208206

209207

208+
@overload
209+
async def get_array_metadata(
210+
store_path: StorePath, zarr_format: Literal[3]
211+
) -> ArrayMetadataJSON_V3: ...
212+
213+
214+
@overload
215+
async def get_array_metadata(
216+
store_path: StorePath, zarr_format: Literal[2]
217+
) -> ArrayMetadataJSON_V2: ...
218+
219+
220+
@overload
221+
async def get_array_metadata(
222+
store_path: StorePath, zarr_format: None
223+
) -> ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2: ...
224+
225+
210226
async def get_array_metadata(
211227
store_path: StorePath, zarr_format: ZarrFormat | None = 3
212-
) -> dict[str, JSON]:
228+
) -> ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2:
213229
if zarr_format == 2:
214230
zarray_bytes, zattrs_bytes = await gather(
215231
(store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype),
@@ -241,19 +257,25 @@ async def get_array_metadata(
241257
else:
242258
raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format)
243259

244-
metadata_dict: dict[str, JSON]
260+
metadata_dict: ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3
245261
if zarr_format == 2:
246262
# V2 arrays are comprised of a .zarray and .zattrs objects
247263
assert zarray_bytes is not None
248264
metadata_dict = json.loads(zarray_bytes.to_bytes())
249265
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
250266
metadata_dict["attributes"] = zattrs_dict
267+
tycheck = check_type(metadata_dict, ArrayMetadataJSON_V2)
268+
if not tycheck.success:
269+
msg = "The .zarray object at {store_path} is not a valid Zarr array metadata object. "
270+
raise NodeTypeValidationError("zarray", "Zarr array metadata object", metadata_dict)
251271
else:
252272
# V3 arrays are comprised of a zarr.json object
253273
assert zarr_json_bytes is not None
254274
metadata_dict = json.loads(zarr_json_bytes.to_bytes())
255-
256-
parse_node_type_array(metadata_dict.get("node_type"))
275+
tycheck = check_type(metadata_dict, ArrayMetadataJSON_V3)
276+
if not tycheck.success:
277+
msg = "The zarr.json object at {store_path} is not a valid Zarr array metadata object. "
278+
raise NodeTypeValidationError("zarr.json", "Zarr array metadata object", metadata_dict)
257279

258280
return metadata_dict
259281

@@ -292,22 +314,22 @@ class AsyncArray(Generic[T_ArrayMetadata]):
292314
@overload
293315
def __init__(
294316
self: AsyncArray[ArrayV2Metadata],
295-
metadata: ArrayV2Metadata | ArrayV2MetadataDict,
317+
metadata: ArrayV2Metadata | ArrayMetadataJSON_V2,
296318
store_path: StorePath,
297319
config: ArrayConfigLike | None = None,
298320
) -> None: ...
299321

300322
@overload
301323
def __init__(
302324
self: AsyncArray[ArrayV3Metadata],
303-
metadata: ArrayV3Metadata | ArrayV3MetadataDict,
325+
metadata: ArrayV3Metadata | ArrayMetadataJSON_V3,
304326
store_path: StorePath,
305327
config: ArrayConfigLike | None = None,
306328
) -> None: ...
307329

308330
def __init__(
309331
self,
310-
metadata: ArrayMetadata | ArrayMetadataDict,
332+
metadata: ArrayMetadata | ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3,
311333
store_path: StorePath,
312334
config: ArrayConfigLike | None = None,
313335
) -> None:
@@ -959,9 +981,7 @@ async def open(
959981
"""
960982
store_path = await make_store_path(store)
961983
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
962-
# TODO: remove this cast when we have better type hints
963-
_metadata_dict = cast("ArrayV3MetadataDict", metadata_dict)
964-
return cls(store_path=store_path, metadata=_metadata_dict)
984+
return cls(store_path=store_path, metadata=metadata_dict)
965985

966986
@property
967987
def store(self) -> Store:

src/zarr/core/common.py

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Final,
1515
Generic,
1616
Literal,
17+
NotRequired,
1718
TypedDict,
1819
TypeVar,
1920
cast,
@@ -47,11 +48,11 @@
4748
ANY_ACCESS_MODE: Final = "r", "r+", "a", "w", "w-"
4849
DimensionNames = Iterable[str | None] | None
4950

50-
TName = TypeVar("TName", bound=str)
51+
TName = TypeVar("TName", bound=str, covariant=True)
5152
TConfig = TypeVar("TConfig", bound=Mapping[str, object])
5253

5354

54-
class NamedConfig(TypedDict, Generic[TName, TConfig]):
55+
class NamedRequiredConfig(TypedDict, Generic[TName, TConfig]):
5556
"""
5657
A typed dictionary representing an object with a name and configuration, where the configuration
5758
is a mapping of string keys to values, e.g. another typed dictionary or a JSON object.
@@ -67,6 +68,104 @@ class NamedConfig(TypedDict, Generic[TName, TConfig]):
6768
"""The configuration of the object."""
6869

6970

71+
class NamedConfig(TypedDict, Generic[TName, TConfig]):
72+
"""
73+
A typed dictionary representing an object with a name and configuration, where the configuration
74+
is a mapping of string keys to values, e.g. another typed dictionary or a JSON object.
75+
76+
The configuration key is not required.
77+
78+
This class is generic with two type parameters: the type of the name (``TName``) and the type of
79+
the configuration (``TConfig``).
80+
"""
81+
82+
name: ReadOnly[TName]
83+
"""The name of the object."""
84+
85+
configuration: ReadOnly[NotRequired[TConfig]]
86+
"""The configuration of the object."""
87+
88+
89+
class ArrayMetadataJSON_V2(TypedDict):
90+
"""
91+
A typed dictionary model for Zarr V2 array metadata.
92+
"""
93+
94+
zarr_format: Literal[2]
95+
dtype: str | StructuredName_V2
96+
shape: Sequence[int]
97+
chunks: Sequence[int]
98+
dimension_separator: NotRequired[Literal[".", "/"]]
99+
fill_value: Any
100+
filters: Sequence[CodecJSON_V2[str]] | None
101+
order: Literal["C", "F"]
102+
compressor: CodecJSON_V2[str] | None
103+
attributes: NotRequired[Mapping[str, JSON]]
104+
105+
106+
class GroupMetadataJSON_V2(TypedDict):
107+
"""
108+
A typed dictionary model for Zarr V2 group metadata.
109+
"""
110+
111+
zarr_format: Literal[2]
112+
attributes: NotRequired[Mapping[str, JSON]]
113+
114+
115+
class ArrayMetadataJSON_V3(TypedDict):
116+
"""
117+
A typed dictionary model for Zarr V3 array metadata.
118+
"""
119+
120+
zarr_format: Literal[3]
121+
node_type: Literal["array"]
122+
data_type: str | NamedConfig[str, Mapping[str, object]]
123+
shape: Sequence[int]
124+
chunk_grid: NamedConfig[str, Mapping[str, object]]
125+
chunk_key_encoding: NamedConfig[str, Mapping[str, object]]
126+
fill_value: object
127+
codecs: Sequence[str | NamedConfig[str, Mapping[str, object]]]
128+
attributes: NotRequired[Mapping[str, object]]
129+
storage_transformers: NotRequired[Sequence[NamedConfig[str, Mapping[str, object]]]]
130+
dimension_names: NotRequired[Sequence[str | None]]
131+
132+
133+
class GroupMetadataJSON_V3(TypedDict):
134+
"""
135+
A typed dictionary model for Zarr V3 group metadata.
136+
"""
137+
138+
zarr_format: Literal[3]
139+
node_type: Literal["group"]
140+
attributes: NotRequired[Mapping[str, JSON]]
141+
142+
143+
class CodecJSON_V2(TypedDict, Generic[TName]):
144+
"""The JSON representation of a codec for Zarr V2"""
145+
146+
id: ReadOnly[TName]
147+
148+
149+
CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
150+
"""The JSON representation of a codec for Zarr V3."""
151+
152+
# The widest type we will *accept* for a codec JSON
153+
# This covers v2 and v3
154+
CodecJSON = str | Mapping[str, object]
155+
"""The widest type of JSON-like input that could specify a codec."""
156+
157+
158+
# By comparison, The JSON representation of a dtype in zarr v3 is much simpler.
159+
# It's either a string, or a structured dict
160+
DTypeSpec_V3 = str | NamedConfig[str, Mapping[str, object]]
161+
162+
# This is the JSON representation of a structured dtype in zarr v2
163+
StructuredName_V2 = Sequence["str | StructuredName_V2"]
164+
165+
# This models the type of the name a dtype might have in zarr v2 array metadata
166+
DTypeName_V2 = StructuredName_V2 | str
167+
168+
70169
def product(tup: tuple[int, ...]) -> int:
71170
return functools.reduce(operator.mul, tup, 1)
72171

0 commit comments

Comments
 (0)