Skip to content

Commit 08fa7f5

Browse files
committed
objects for datatype, chunk_key_encodings, chunk_grid
1 parent 0353ae9 commit 08fa7f5

File tree

5 files changed

+130
-42
lines changed

5 files changed

+130
-42
lines changed

src/zarr/core/chunk_grids.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,17 @@ def __init__(self, *, chunk_shape: ChunkCoordsLike) -> None:
179179

180180
@classmethod
181181
def _from_dict(cls, data: dict[str, JSON]) -> Self:
182-
_, configuration_parsed = parse_named_configuration(data, "regular")
182+
_, config_parsed = parse_named_configuration(data, "regular")
183+
184+
if config_parsed and not all(
185+
k == "chunk_shape" or (isinstance(v, dict) and v.get("must_understand") is False)
186+
for k, v in config_parsed.items()
187+
):
188+
raise ValueError(
189+
f"The chunk grid expects a 'chunk_shape' key. Got {list(config_parsed.keys())}."
190+
)
183191

184-
return cls(**configuration_parsed) # type: ignore[arg-type]
192+
return cls(chunk_shape=config_parsed.get("chunk_shape")) # type: ignore[arg-type]
185193

186194
def to_dict(self) -> dict[str, JSON]:
187195
return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}}

src/zarr/core/chunk_key_encodings.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,24 +44,34 @@ def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncodingLike) -> ChunkKeyEnco
4444
return data
4545

4646
# handle ChunkKeyEncodingParams
47-
if "name" in data and "separator" in data:
47+
if isinstance(data, dict) and data.keys() == {"name", "separator"}:
4848
data = {"name": data["name"], "configuration": {"separator": data["separator"]}}
4949

5050
# TODO: remove this cast when we are statically typing the JSON metadata completely.
5151
data = cast(dict[str, JSON], data)
5252

5353
# configuration is optional for chunk key encodings
5454
name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False)
55+
56+
if config_parsed and not all(
57+
k == "separator" or (isinstance(v, dict) and v.get("must_understand") is False)
58+
for k, v in config_parsed.items()
59+
):
60+
raise ValueError(
61+
f"The chunk key encoding expects a 'separator' key. Got {list(config_parsed.keys())}."
62+
)
63+
5564
if name_parsed == "default":
56-
if config_parsed is None:
57-
# for default, normalize missing configuration to use the "/" separator.
58-
config_parsed = {"separator": "/"}
59-
return DefaultChunkKeyEncoding(**config_parsed) # type: ignore[arg-type]
65+
# for default, normalize missing configuration to use the "/" separator.
66+
return DefaultChunkKeyEncoding(
67+
separator=config_parsed.get("separator") if config_parsed else "/" # type: ignore[arg-type]
68+
)
6069
if name_parsed == "v2":
61-
if config_parsed is None:
62-
# for v2, normalize missing configuration to use the "." separator.
63-
config_parsed = {"separator": "."}
64-
return V2ChunkKeyEncoding(**config_parsed) # type: ignore[arg-type]
70+
# for v2, normalize missing configuration to use the "." separator.
71+
return V2ChunkKeyEncoding(
72+
separator=config_parsed.get("separator") if config_parsed else "." # type: ignore[arg-type]
73+
)
74+
6575
msg = f"Unknown chunk key encoding. Got {name_parsed}, expected one of ('v2', 'default')."
6676
raise ValueError(msg)
6777

@@ -77,7 +87,7 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
7787
pass
7888

7989

80-
ChunkKeyEncodingLike: TypeAlias = ChunkKeyEncodingParams | ChunkKeyEncoding
90+
ChunkKeyEncodingLike: TypeAlias = ChunkKeyEncodingParams | ChunkKeyEncoding | str
8191

8292

8393
@dataclass(frozen=True)

src/zarr/core/common.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,26 @@ def parse_named_configuration(
114114

115115

116116
def parse_named_configuration(
117-
data: JSON, expected_name: str | None = None, *, require_configuration: bool = True
117+
data: JSON,
118+
expected_name: str | None = None,
119+
*,
120+
require_configuration: bool = True,
118121
) -> tuple[str, JSON | None]:
122+
if isinstance(data, str):
123+
data = {"name": data}
119124
if not isinstance(data, dict):
120125
raise TypeError(f"Expected dict, got {type(data)}")
121-
122-
if not all(
126+
elif not all(
123127
k in {"name", "configuration"}
124128
or (isinstance(v, dict) and (v.get("must_understand") is False))
125129
for k, v in data.items()
126130
):
127131
raise ValueError(
128132
f"Named configuration expects keys 'name' and 'configuration'. Got {list(data.keys())}."
129133
)
130-
if "name" not in data:
134+
elif "name" not in data:
131135
raise ValueError(f"Named configuration does not have a 'name' key. Got {data}.")
136+
132137
name_parsed = parse_name(data["name"], expected_name)
133138
if "configuration" in data:
134139
configuration_parsed = parse_configuration(data["configuration"])

src/zarr/core/metadata/v3.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,10 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
413413
_ = parse_node_type_array(_data.pop("node_type"))
414414

415415
# check that the data_type attribute is valid
416-
data_type = DataType.parse(_data.pop("data_type"))
416+
dt = _data.pop("data_type")
417+
if isinstance(dt, dict):
418+
dt, _ = parse_named_configuration(dt, require_configuration=False)
419+
data_type = DataType.parse(dt)
417420

418421
# dimension_names key is optional, normalize missing to `None`
419422
_data["dimension_names"] = _data.pop("dimension_names", None)

tests/test_metadata/test_v3.py

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -428,32 +428,94 @@ def default_metadata_dict(**kwargs: Any) -> dict[str, Any]:
428428
return d
429429

430430

431-
def test_fail_on_invalid_key() -> None:
432-
ArrayV3Metadata.from_dict(default_metadata_dict())
433-
# Metadata contains invalid keys
434-
with pytest.raises(ValueError, match=re.escape("Unexpected zarr metadata keys: ['unknown']")):
435-
ArrayV3Metadata.from_dict(default_metadata_dict(unknown="value"))
436-
# accepts invalid key with must_understand=false
437-
ArrayV3Metadata.from_dict(
438-
default_metadata_dict(unknown={"name": "value", "must_understand": False})
439-
)
440-
# Named configuration contains invalid keys
441-
with pytest.raises(
442-
ValueError,
443-
match=re.escape(
444-
"Named configuration expects keys 'name' and 'configuration'. Got ['name', 'unknown', 'configuration']."
431+
@pytest.mark.parametrize(
432+
("metadata_dict", "is_valid", "fail_msg"),
433+
[
434+
(default_metadata_dict(), True, ""),
435+
(
436+
default_metadata_dict(unknown="value"),
437+
False,
438+
"Unexpected zarr metadata keys: ['unknown']",
445439
),
446-
):
447-
ArrayV3Metadata.from_dict(
448-
default_metadata_dict(codecs=[{"name": "bytes", "unknown": {}, "configuration": {}}])
449-
)
450-
451-
# accepts invalid key with must_understand=false
452-
ArrayV3Metadata.from_dict(
453-
default_metadata_dict(
454-
codecs=[{"name": "bytes", "configuration": {}, "unknown": {"must_understand": False}}]
455-
)
456-
)
440+
(default_metadata_dict(unknown={"name": "value", "must_understand": False}), True, ""),
441+
(
442+
default_metadata_dict(codecs=[{"name": "bytes", "unknown": {}, "configuration": {}}]),
443+
False,
444+
"Named configuration expects keys 'name' and 'configuration'. Got ['name', 'unknown', 'configuration'].",
445+
),
446+
(
447+
default_metadata_dict(
448+
codecs=[
449+
{"name": "bytes", "configuration": {}, "unknown": {"must_understand": False}}
450+
]
451+
),
452+
True,
453+
"",
454+
),
455+
(
456+
default_metadata_dict(data_type={"name": "int8", "value": {"must_understand": False}}),
457+
True,
458+
"",
459+
),
460+
(
461+
default_metadata_dict(
462+
chunk_key_encoding={
463+
"name": "default",
464+
"configuration": {"unknown": {"name": "value", "must_understand": False}},
465+
}
466+
),
467+
True,
468+
"",
469+
),
470+
(
471+
default_metadata_dict(
472+
chunk_key_encoding={"name": "default", "configuration": {"unknown": "value"}}
473+
),
474+
False,
475+
"The chunk key encoding expects a 'separator' key. Got ['unknown'].",
476+
),
477+
(default_metadata_dict(chunk_key_encoding="default"), True, ""),
478+
(default_metadata_dict(chunk_key_encoding="invalid"), False, ""),
479+
(
480+
default_metadata_dict(
481+
chunk_grid={"name": "regular", "configuration": {"chunk_shape": [2]}}
482+
),
483+
True,
484+
"",
485+
),
486+
(
487+
default_metadata_dict(
488+
chunk_grid={
489+
"name": "regular",
490+
"configuration": {"chunk_shape": [2], "unknown": "value"},
491+
}
492+
),
493+
False,
494+
"The chunk grid expects a 'chunk_shape' key. Got ['chunk_shape', 'unknown'].",
495+
),
496+
(
497+
default_metadata_dict(
498+
chunk_grid={
499+
"name": "regular",
500+
"configuration": {
501+
"chunk_shape": [2],
502+
"unknown": {"name": "value", "must_understand": False},
503+
},
504+
}
505+
),
506+
True,
507+
"",
508+
),
509+
],
510+
)
511+
def test_fail_on_invalid_metadata_key(
512+
metadata_dict: dict[str, Any], is_valid: bool, fail_msg: str
513+
) -> None:
514+
if is_valid:
515+
ArrayV3Metadata.from_dict(metadata_dict)
516+
else:
517+
with pytest.raises(ValueError, match=re.escape(fail_msg)):
518+
ArrayV3Metadata.from_dict(metadata_dict)
457519

458520

459521
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)