-
-
Notifications
You must be signed in to change notification settings - Fork 363
add a runtime type checker for metadata objects #3400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 28 commits
b2f4ff0
7adff59
35c7203
07e2315
85b48df
4fe9ae4
2125153
32cd309
21d6188
6125c1b
cf0615b
7fce136
b467747
943e148
d1be08c
ea3ed12
a098cc2
fc06ab4
1d4bd72
d061fe1
bbd8ba7
11f7499
1892df1
eda19ec
bb7e84e
4cc0385
be71a87
971945b
30d48a8
a483c73
c7096b1
9eb287b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Add a runtime type checker for ``JSON`` types, and a variety of typeddict classes necessary for | ||
modelling Zarr metadata documents. This increases the type-safety of our internal metadata routines, | ||
and provides Zarr users with types they can use to model Zarr metadata. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,8 @@ | |
ZARR_JSON, | ||
ZARRAY_JSON, | ||
ZATTRS_JSON, | ||
ArrayMetadataJSON_V2, | ||
ArrayMetadataJSON_V3, | ||
DimensionNames, | ||
MemoryOrder, | ||
ShapeLike, | ||
|
@@ -103,11 +105,8 @@ | |
) | ||
from zarr.core.metadata import ( | ||
ArrayMetadata, | ||
ArrayMetadataDict, | ||
ArrayV2Metadata, | ||
ArrayV2MetadataDict, | ||
ArrayV3Metadata, | ||
ArrayV3MetadataDict, | ||
T_ArrayMetadata, | ||
) | ||
from zarr.core.metadata.v2 import ( | ||
|
@@ -116,11 +115,12 @@ | |
parse_compressor, | ||
parse_filters, | ||
) | ||
from zarr.core.metadata.v3 import parse_node_type_array | ||
from zarr.core.sync import sync | ||
from zarr.core.type_check import check_type | ||
from zarr.errors import ( | ||
ArrayNotFoundError, | ||
MetadataValidationError, | ||
NodeTypeValidationError, | ||
ZarrDeprecationWarning, | ||
ZarrUserWarning, | ||
) | ||
|
@@ -175,25 +175,32 @@ class DefaultFillValue: | |
DEFAULT_FILL_VALUE = DefaultFillValue() | ||
|
||
|
||
def parse_array_metadata(data: Any) -> ArrayMetadata: | ||
@overload | ||
def parse_array_metadata(data: ArrayV2Metadata | ArrayMetadataJSON_V2) -> ArrayV2Metadata: ... | ||
|
||
|
||
@overload | ||
def parse_array_metadata(data: ArrayV3Metadata | ArrayMetadataJSON_V3) -> ArrayV3Metadata: ... | ||
|
||
|
||
def parse_array_metadata( | ||
data: ArrayV2Metadata | ArrayMetadataJSON_V2 | ArrayV3Metadata | ArrayMetadataJSON_V3, | ||
) -> ArrayV2Metadata | ArrayV3Metadata: | ||
""" | ||
If the input is a dict representation of a Zarr metadata document, instantiate the right metadata | ||
class from that dict. If the input is a metadata object, return it. | ||
""" | ||
|
||
if isinstance(data, ArrayMetadata): | ||
return data | ||
elif isinstance(data, dict): | ||
zarr_format = data.get("zarr_format") | ||
else: | ||
zarr_format = data["zarr_format"] | ||
if zarr_format == 3: | ||
meta_out = ArrayV3Metadata.from_dict(data) | ||
if len(meta_out.storage_transformers) > 0: | ||
msg = ( | ||
f"Array metadata contains storage transformers: {meta_out.storage_transformers}." | ||
"Arrays with storage transformers are not supported in zarr-python at this time." | ||
) | ||
raise ValueError(msg) | ||
return meta_out | ||
return ArrayV3Metadata.from_dict(data) # type: ignore[arg-type] | ||
elif zarr_format == 2: | ||
return ArrayV2Metadata.from_dict(data) | ||
return ArrayV2Metadata.from_dict(data) # type: ignore[arg-type] | ||
else: | ||
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") | ||
raise TypeError # pragma: no cover | ||
|
||
|
||
def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None) -> CodecPipeline: | ||
|
@@ -213,9 +220,27 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None | |
raise TypeError # pragma: no cover | ||
|
||
|
||
@overload | ||
async def get_array_metadata( | ||
store_path: StorePath, zarr_format: Literal[3] | ||
) -> ArrayMetadataJSON_V3: ... | ||
|
||
|
||
@overload | ||
async def get_array_metadata( | ||
store_path: StorePath, zarr_format: Literal[2] | ||
) -> ArrayMetadataJSON_V2: ... | ||
|
||
|
||
@overload | ||
async def get_array_metadata( | ||
store_path: StorePath, zarr_format: None | ||
) -> ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2: ... | ||
|
||
|
||
async def get_array_metadata( | ||
store_path: StorePath, zarr_format: ZarrFormat | None = 3 | ||
) -> dict[str, JSON]: | ||
) -> ArrayMetadataJSON_V3 | ArrayMetadataJSON_V2: | ||
if zarr_format == 2: | ||
zarray_bytes, zattrs_bytes = await gather( | ||
(store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype), | ||
|
@@ -260,19 +285,25 @@ async def get_array_metadata( | |
msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] | ||
raise MetadataValidationError(msg) | ||
|
||
metadata_dict: dict[str, JSON] | ||
metadata_dict: ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3 | ||
if zarr_format == 2: | ||
# V2 arrays are comprised of a .zarray and .zattrs objects | ||
assert zarray_bytes is not None | ||
metadata_dict = json.loads(zarray_bytes.to_bytes()) | ||
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} | ||
metadata_dict["attributes"] = zattrs_dict | ||
tycheck = check_type(metadata_dict, ArrayMetadataJSON_V2) | ||
if not tycheck.success: | ||
msg = "The .zarray object at {store_path} is not a valid Zarr array metadata object. " | ||
raise NodeTypeValidationError("zarray", "Zarr array metadata object", metadata_dict) | ||
else: | ||
# V3 arrays are comprised of a zarr.json object | ||
assert zarr_json_bytes is not None | ||
metadata_dict = json.loads(zarr_json_bytes.to_bytes()) | ||
|
||
parse_node_type_array(metadata_dict.get("node_type")) | ||
tycheck = check_type(metadata_dict, ArrayMetadataJSON_V3) | ||
if not tycheck.success: | ||
msg = "The zarr.json object at {store_path} is not a valid Zarr array metadata object. " | ||
raise NodeTypeValidationError("zarr.json", "Zarr array metadata object", metadata_dict) | ||
|
||
return metadata_dict | ||
|
||
|
@@ -311,22 +342,22 @@ class AsyncArray(Generic[T_ArrayMetadata]): | |
@overload | ||
def __init__( | ||
self: AsyncArray[ArrayV2Metadata], | ||
metadata: ArrayV2Metadata | ArrayV2MetadataDict, | ||
metadata: ArrayV2Metadata | ArrayMetadataJSON_V2, | ||
store_path: StorePath, | ||
config: ArrayConfigLike | None = None, | ||
) -> None: ... | ||
|
||
@overload | ||
def __init__( | ||
self: AsyncArray[ArrayV3Metadata], | ||
metadata: ArrayV3Metadata | ArrayV3MetadataDict, | ||
metadata: ArrayV3Metadata | ArrayMetadataJSON_V3, | ||
store_path: StorePath, | ||
config: ArrayConfigLike | None = None, | ||
) -> None: ... | ||
|
||
def __init__( | ||
self, | ||
metadata: ArrayMetadata | ArrayMetadataDict, | ||
metadata: ArrayMetadata | ArrayMetadataJSON_V2 | ArrayMetadataJSON_V3, | ||
store_path: StorePath, | ||
config: ArrayConfigLike | None = None, | ||
) -> None: | ||
|
@@ -945,7 +976,7 @@ def from_dict( | |
ValueError | ||
If the dictionary data is invalid or incompatible with either Zarr format 2 or 3 array creation. | ||
""" | ||
metadata = parse_array_metadata(data) | ||
metadata = parse_array_metadata(data) # type: ignore[call-overload] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need the overload because the |
||
return cls(metadata=metadata, store_path=store_path) | ||
|
||
@classmethod | ||
|
@@ -978,9 +1009,7 @@ async def open( | |
""" | ||
store_path = await make_store_path(store) | ||
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) | ||
# TODO: remove this cast when we have better type hints | ||
_metadata_dict = cast("ArrayV3MetadataDict", metadata_dict) | ||
return cls(store_path=store_path, metadata=_metadata_dict) | ||
return cls(store_path=store_path, metadata=metadata_dict) | ||
|
||
@property | ||
def store(self) -> Store: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably should reexport these types here for backwards compatibility.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done in 30d48a8