Skip to content

Commit 786669c

Browse files
committed
check for unexpected zarr metadata keys and codec configuration
1 parent 50cd5e0 commit 786669c

File tree

3 files changed

+40
-2
lines changed

3 files changed

+40
-2
lines changed

src/zarr/core/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ def parse_named_configuration(
118118
) -> tuple[str, JSON | None]:
119119
if not isinstance(data, dict):
120120
raise TypeError(f"Expected dict, got {type(data)}")
121+
if set(data) - {"name", "configuration"}:
122+
raise ValueError(f"Named configuration expects keys 'name' and 'configuration'. Got {list(data.keys())}.")
121123
if "name" not in data:
122124
raise ValueError(f"Named configuration does not have a 'name' key. Got {data}.")
123125
name_parsed = parse_name(data["name"], expected_name)

src/zarr/core/metadata/v3.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,16 @@ def parse_codecs(data: object) -> tuple[Codec, ...]:
6969

7070
if not isinstance(data, Iterable):
7171
raise TypeError(f"Expected iterable, got {type(data)}")
72-
72+
if isinstance(data, str):
73+
data = [data]
7374
for c in data:
7475
if isinstance(
7576
c, ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec
7677
): # Can't use Codec here because of mypy limitation
7778
out += (c,)
7879
else:
80+
if isinstance(c, str):
81+
c = {"name":c}
7982
name_parsed, _ = parse_named_configuration(c, require_configuration=False)
8083
out += (get_codec_class(name_parsed).from_dict(c),)
8184

@@ -259,10 +262,14 @@ def __init__(
259262
attributes: dict[str, JSON] | None,
260263
dimension_names: Iterable[str] | None,
261264
storage_transformers: Iterable[dict[str, JSON]] | None = None,
265+
**kwargs: dict[str, Any],
262266
) -> None:
263267
"""
264268
Because the class is a frozen dataclass, we set attributes using object.__setattr__
265269
"""
270+
if kwargs:
271+
raise ValueError(f"Unexpected zarr metadata keys: {list(kwargs.keys())}")
272+
266273
shape_parsed = parse_shapelike(shape)
267274
data_type_parsed = DataType.parse(data_type)
268275
chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)

tests/test_metadata/test_v3.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,35 @@ def test_dtypes(dtype_str: str) -> None:
412412
# return type for vlen types may vary depending on numpy version
413413
assert dt.byte_count is None
414414

415+
def default_metadata_dict(**kwargs) -> dict[str, Any]:
416+
d= {
417+
"zarr_format": 3,
418+
"node_type": "array",
419+
"shape": (1,),
420+
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
421+
"data_type": "float64",
422+
"chunk_key_encoding": {"name": "default", "separator": "."},
423+
"codecs": [{"name": "bytes"}],
424+
"fill_value": 0,
425+
}
426+
for k,v in kwargs.items():
427+
d[k]=v
428+
return d
415429

416430
def test_fail_on_invalid_value() -> None:
417-
pass
431+
ArrayV3Metadata.from_dict(default_metadata_dict())
432+
with pytest.raises(ValueError, match=re.escape("Unexpected zarr metadata keys: ['unknown']")):
433+
ArrayV3Metadata.from_dict(default_metadata_dict(unknown="value"))
434+
with pytest.raises(ValueError, match=re.escape("Named configuration expects keys 'name' and 'configuration'. Got ['name', 'unknown'].")):
435+
ArrayV3Metadata.from_dict(default_metadata_dict(codecs=[{"name":"bytes","unknown": "value"}]))
436+
437+
438+
def test_string_codecs() -> None:
439+
expected = ArrayV3Metadata.from_dict(default_metadata_dict(data_type="bool", codecs=[{"name": "bytes"}]))
440+
result1 = ArrayV3Metadata.from_dict(default_metadata_dict(data_type="bool",codecs=["bytes"]))
441+
assert result1.codecs == expected.codecs
442+
result2 = ArrayV3Metadata.from_dict(default_metadata_dict(data_type="bool",codecs="bytes"))
443+
assert result2.codecs == expected.codecs
444+
ArrayV3Metadata.from_dict(default_metadata_dict(data_type="int16", codecs=["bytes"]))
445+
with pytest.raises(ValueError, match="Expected bytes codec to specify argument endian for data_type=int16"):
446+
ArrayV3Metadata.from_dict(default_metadata_dict(data_type="int16", codecs=["bytes"]))

0 commit comments

Comments
 (0)