Skip to content

Commit 32cd309

Browse files
committed
fix array metadata dicts and refactor to_dict test
1 parent 2125153 commit 32cd309

File tree

1 file changed

+75
-72
lines changed

1 file changed

+75
-72
lines changed

tests/test_metadata/test_v3.py

Lines changed: 75 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from collections.abc import Mapping
34
import json
45
import re
56
from typing import TYPE_CHECKING, Literal
@@ -10,6 +11,7 @@
1011
from zarr.codecs.bytes import BytesCodec
1112
from zarr.core.buffer import default_buffer_prototype
1213
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
14+
from zarr.core.common import ArrayMetadataJSON_V3, NamedConfig
1315
from zarr.core.config import config
1416
from zarr.core.dtype import get_data_type_from_native_dtype
1517
from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING
@@ -110,83 +112,84 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
110112
dtype_instance.from_json_scalar(fill_value, zarr_format=3)
111113

112114

113-
@pytest.mark.parametrize("chunk_grid", ["regular"])
114-
@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
115-
@pytest.mark.parametrize("codecs", [[BytesCodec(endian=None)]])
115+
@pytest.mark.parametrize("chunk_grid", [{"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}}])
116+
@pytest.mark.parametrize("codecs", [({"name" : "bytes"},)])
116117
@pytest.mark.parametrize("fill_value", [0, 1])
117-
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
118-
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
119-
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
120-
@pytest.mark.parametrize("storage_transformers", [None, ()])
118+
@pytest.mark.parametrize("data_type", ["int8", "uint8"])
119+
@pytest.mark.parametrize("chunk_key_encoding", [
120+
{"name": "v2", "configuration": {"separator": "."}},
121+
{"name": "v2", "configuration": {"separator": "/"}},
122+
{"name": "v2"},
123+
{"name": "default", "configuration": {"separator": "."}},
124+
{"name": "default", "configuration": {"separator": "/"}},
125+
{"name": "default"},
126+
])
127+
@pytest.mark.parametrize("attributes", ["unset", {"foo": "bar"}])
128+
@pytest.mark.parametrize("dimension_names", [(None, None, None), ('a','b', None), "unset"])
129+
@pytest.mark.parametrize("storage_transformers", [(), "unset"])
121130
def test_metadata_to_dict(
122-
chunk_grid: str,
131+
chunk_grid: NamedConfig[str, Mapping[str, object]],
123132
codecs: list[Codec],
133+
data_type: str,
124134
fill_value: Any,
125-
chunk_key_encoding: Literal["v2", "default"],
126-
dimension_separator: Literal[".", "/"] | None,
127-
dimension_names: Literal["nones", "strings", "missing"],
128-
attributes: dict[str, Any] | None,
129-
storage_transformers: tuple[dict[str, JSON]] | None,
135+
chunk_key_encoding: NamedConfig[str, Mapping[str, object]],
136+
dimension_names: tuple[str | None, ...] | Literal["unset"],
137+
attributes: dict[str, Any] | Literal['unset'],
138+
storage_transformers: tuple[dict[str, JSON]] | Literal["unset"],
130139
) -> None:
131140
shape = (1, 2, 3)
132-
data_type_str = "uint8"
133-
if chunk_grid == "regular":
134-
cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}}
135-
136-
cke: dict[str, Any]
137-
cke_name_dict = {"name": chunk_key_encoding}
138-
if dimension_separator is not None:
139-
cke = cke_name_dict | {"configuration": {"separator": dimension_separator}}
140-
else:
141-
cke = cke_name_dict
142-
dnames: tuple[str | None, ...] | None
143-
144-
if dimension_names == "strings":
145-
dnames = tuple(map(str, range(len(shape))))
146-
elif dimension_names == "missing":
147-
dnames = None
148-
elif dimension_names == "nones":
149-
dnames = (None,) * len(shape)
150-
151-
metadata_dict = {
152-
"zarr_format": 3,
153-
"node_type": "array",
154-
"shape": shape,
155-
"chunk_grid": cgrid,
156-
"data_type": data_type_str,
157-
"chunk_key_encoding": cke,
158-
"codecs": tuple(c.to_dict() for c in codecs),
159-
"fill_value": fill_value,
160-
"storage_transformers": storage_transformers,
161-
}
162141

163-
if attributes is not None:
164-
metadata_dict["attributes"] = attributes
165-
if dnames is not None:
166-
metadata_dict["dimension_names"] = dnames
142+
# These are the fields in the array metadata document that are optional
143+
not_required = {}
167144

168-
metadata = ArrayV3Metadata.from_dict(metadata_dict)
169-
observed = metadata.to_dict()
170-
expected = metadata_dict.copy()
145+
if dimension_names != "unset":
146+
not_required["dimension_names"] = dimension_names
171147

172-
# if unset or None or (), storage_transformers gets normalized to ()
173-
assert observed["storage_transformers"] == ()
174-
observed.pop("storage_transformers")
175-
expected.pop("storage_transformers")
148+
if storage_transformers != "unset":
149+
not_required["storage_transformers"] = storage_transformers
176150

177-
if attributes is None:
178-
assert observed["attributes"] == {}
179-
observed.pop("attributes")
151+
if attributes != "unset":
152+
not_required["attributes"] = attributes
180153

181-
if dimension_separator is None:
182-
if chunk_key_encoding == "default":
183-
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
154+
source_dict = {
155+
"zarr_format": 3,
156+
"node_type": "array",
157+
"shape": shape,
158+
"chunk_grid": chunk_grid,
159+
"data_type": data_type,
160+
"chunk_key_encoding": chunk_key_encoding,
161+
"codecs": codecs,
162+
"fill_value": fill_value,
163+
} | not_required
164+
165+
metadata = ArrayV3Metadata.from_dict(source_dict)
166+
parsed_dict = metadata.to_dict()
167+
168+
for k,v in parsed_dict.items():
169+
if k in source_dict:
170+
if k == 'chunk_key_encoding':
171+
assert v['name'] == chunk_key_encoding['name']
172+
if chunk_key_encoding['name'] == 'v2':
173+
if "configuration" in chunk_key_encoding:
174+
if "separator" in chunk_key_encoding['configuration']:
175+
assert v['configuration']['separator'] == chunk_key_encoding['configuration']['separator']
176+
else:
177+
assert v["configuration"]["separator"] == "."
178+
elif chunk_key_encoding['name'] == 'default':
179+
if "configuration" in chunk_key_encoding:
180+
if "separator" in chunk_key_encoding['configuration']:
181+
assert v['configuration']['separator'] == chunk_key_encoding['configuration']['separator']
182+
else:
183+
assert v["configuration"]["separator"] == "/"
184+
else:
185+
assert source_dict[k] == v
184186
else:
185-
expected_cke_dict = V2ChunkKeyEncoding(separator=".").to_dict()
186-
assert observed["chunk_key_encoding"] == expected_cke_dict
187-
observed.pop("chunk_key_encoding")
188-
expected.pop("chunk_key_encoding")
189-
assert observed == expected
187+
if k == 'attributes':
188+
assert v == {}
189+
elif k == 'storage_transformers':
190+
assert v == ()
191+
else:
192+
assert v is None
190193

191194

192195
@pytest.mark.parametrize("indent", [2, 4, None])
@@ -201,14 +204,14 @@ def test_json_indent(indent: int):
201204
@pytest.mark.parametrize("precision", ["ns", "D"])
202205
async def test_datetime_metadata(fill_value: int, precision: str) -> None:
203206
dtype = DateTime64(unit=precision)
204-
metadata_dict = {
207+
metadata_dict: ArrayMetadataJSON_V3 = {
205208
"zarr_format": 3,
206209
"node_type": "array",
207210
"shape": (1,),
208211
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
209212
"data_type": dtype.to_json(zarr_format=3),
210-
"chunk_key_encoding": {"name": "default", "separator": "."},
211-
"codecs": (BytesCodec(),),
213+
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "."}},
214+
"codecs": ({"name": "bytes"},),
212215
"fill_value": dtype.to_json_scalar(
213216
dtype.to_native_dtype().type(fill_value, dtype.unit), zarr_format=3
214217
),
@@ -225,13 +228,13 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:
225228
("data_type", "fill_value"), [("uint8", {}), ("int32", [0, 1]), ("float32", "foo")]
226229
)
227230
async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None:
228-
metadata_dict = {
231+
metadata_dict: ArrayMetadataJSON_V3 = {
229232
"zarr_format": 3,
230233
"node_type": "array",
231234
"shape": (1,),
232235
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
233236
"data_type": data_type,
234-
"chunk_key_encoding": {"name": "default", "separator": "."},
237+
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "."}},
235238
"codecs": ({"name": "bytes"},),
236239
"fill_value": fill_value, # this is not a valid fill value for uint8
237240
}
@@ -242,13 +245,13 @@ async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> N
242245

243246
@pytest.mark.parametrize("fill_value", [("NaN"), "Infinity", "-Infinity"])
244247
async def test_special_float_fill_values(fill_value: str) -> None:
245-
metadata_dict = {
248+
metadata_dict: ArrayMetadataJSON_V3 = {
246249
"zarr_format": 3,
247250
"node_type": "array",
248251
"shape": (1,),
249252
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
250253
"data_type": "float64",
251-
"chunk_key_encoding": {"name": "default", "separator": "."},
254+
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "."}},
252255
"codecs": [{"name": "bytes"}],
253256
"fill_value": fill_value, # this is not a valid fill value for uint8
254257
}

0 commit comments

Comments
 (0)