Skip to content

Commit 0c82437

Browse files
committed
metadata: unknown configuration keys will get rejected except must_understand=False
1 parent a38f25e commit 0c82437

File tree

3 files changed

+54
-7
lines changed

3 files changed

+54
-7
lines changed

src/zarr/core/common.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,12 @@ def parse_named_configuration(
118118
) -> tuple[str, JSON | None]:
119119
if not isinstance(data, dict):
120120
raise TypeError(f"Expected dict, got {type(data)}")
121-
if set(data) - {"name", "configuration"}:
121+
122+
if not all(
123+
k in {"name", "configuration"}
124+
or (isinstance(v, dict) and (v.get("must_understand") is False))
125+
for k, v in data.items()
126+
):
122127
raise ValueError(
123128
f"Named configuration expects keys 'name' and 'configuration'. Got {list(data.keys())}."
124129
)
@@ -128,7 +133,9 @@ def parse_named_configuration(
128133
if "configuration" in data:
129134
configuration_parsed = parse_configuration(data["configuration"])
130135
elif require_configuration:
131-
raise ValueError(f"Named configuration does not have a 'configuration' key. Got {data}.")
136+
raise ValueError(
137+
f"Named configuration with name='{name_parsed}' requires a 'configuration' key. Got keys {list(data.keys())}."
138+
)
132139
else:
133140
configuration_parsed = None
134141
return name_parsed, configuration_parsed

src/zarr/core/metadata/v3.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,10 @@ def __init__(
268268
"""
269269
Because the class is a frozen dataclass, we set attributes using object.__setattr__
270270
"""
271-
if kwargs:
271+
if not all(
272+
isinstance(value, dict) and value.get("must_understand") is False
273+
for value in kwargs.values()
274+
):
272275
raise ValueError(f"Unexpected zarr metadata keys: {list(kwargs.keys())}")
273276

274277
shape_parsed = parse_shapelike(shape)

tests/test_metadata/test_v3.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -430,19 +430,22 @@ def default_metadata_dict(**kwargs: Any) -> dict[str, Any]:
430430

431431
def test_fail_on_invalid_key() -> None:
432432
ArrayV3Metadata.from_dict(default_metadata_dict())
433-
434433
# Metadata contains invalid keys
435434
with pytest.raises(ValueError, match=re.escape("Unexpected zarr metadata keys: ['unknown']")):
436435
ArrayV3Metadata.from_dict(default_metadata_dict(unknown="value"))
436+
# accepts invalid key with must_understand=false
437+
ArrayV3Metadata.from_dict(
438+
default_metadata_dict(unknown={"name": "value", "must_understand": False})
439+
)
437440
# Named configuration contains invalid keys
438441
with pytest.raises(
439442
ValueError,
440443
match=re.escape(
441-
"Named configuration expects keys 'name' and 'configuration'. Got ['name', 'unknown']."
444+
"Named configuration expects keys 'name' and 'configuration'. Got ['name', 'unknown', 'configuration']."
442445
),
443446
):
444447
ArrayV3Metadata.from_dict(
445-
default_metadata_dict(codecs=[{"name": "bytes", "unknown": "value"}])
448+
default_metadata_dict(codecs=[{"name": "bytes", "unknown": {}, "configuration": {}}])
446449
)
447450

448451

@@ -454,9 +457,43 @@ def test_specify_codecs_with_strings() -> None:
454457
assert result1.codecs == expected.codecs
455458
result2 = ArrayV3Metadata.from_dict(default_metadata_dict(data_type="bool", codecs="bytes"))
456459
assert result2.codecs == expected.codecs
460+
with pytest.raises(
461+
ValueError,
462+
match=re.escape(
463+
"Named configuration with name='transpose' requires a 'configuration' key. Got keys ['name']."
464+
),
465+
):
466+
ArrayV3Metadata.from_dict(default_metadata_dict(codecs="transpose"))
467+
468+
469+
@pytest.mark.parametrize(
470+
("codecs", "insufficient_configuration"),
471+
[
472+
(["bytes"], False),
473+
(["transpose", "bytes"], True),
474+
([{"name": "transpose", "configuration": {"order": (0,)}}, "bytes"], False),
475+
(["bytes", "blosc"], True),
476+
(["bytes", "gzip"], True),
477+
(["bytes", {"name": "gzip", "configuration": {"level": 1}}], False),
478+
(["bytes", "zstd"], True),
479+
(["bytes", "crc32c"], False),
480+
(["sharding_indexed"], True),
481+
(["vlen-utf8"], False),
482+
(["vlen-bytes"], False),
483+
],
484+
)
485+
def test_accept_codecs_as_strings(codecs, insufficient_configuration) -> None:
486+
if insufficient_configuration:
487+
with pytest.raises(
488+
ValueError,
489+
match="Named\\ configuration\\ with\\ name='(.)*'\\ requires\\ a\\ 'configuration'\\ key\\.\\ Got\\ keys\\ \\['name'\\]\\.",
490+
):
491+
ArrayV3Metadata.from_dict(default_metadata_dict(codecs=codecs))
492+
else:
493+
ArrayV3Metadata.from_dict(default_metadata_dict(codecs=codecs))
457494

458495

459-
def test_codec_requires_endian() -> None:
496+
def test_bytes_codec_requires_endian() -> None:
460497
raise_msg = "The `endian` configuration needs to be specified for multi-byte data types."
461498
bytes_codec_no_conf = [{"name": "bytes"}]
462499
with pytest.raises(ValueError, match=raise_msg):

0 commit comments

Comments
 (0)