Skip to content

Commit 10fdc90

Browse files
committed
Merge remote-tracking branch 'upstream/v3' into fix/intermediates
2 parents 44e4554 + 5ca080d commit 10fdc90

File tree

4 files changed

+63
-2
lines changed

4 files changed

+63
-2
lines changed

src/zarr/core/array.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
8888
return data
8989
elif isinstance(data, dict):
9090
if data["zarr_format"] == 3:
91-
return ArrayV3Metadata.from_dict(data)
91+
meta_out = ArrayV3Metadata.from_dict(data)
92+
if len(meta_out.storage_transformers) > 0:
93+
msg = (
94+
f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
95+
"Arrays with storage transformers are not supported in zarr-python at this time."
96+
)
97+
raise ValueError(msg)
98+
return meta_out
9299
elif data["zarr_format"] == 2:
93100
return ArrayV2Metadata.from_dict(data)
94101
raise TypeError

src/zarr/core/metadata/v3.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
7272
raise TypeError(msg)
7373

7474

75+
def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
76+
"""
77+
Parse storage_transformers. Zarr python cannot use storage transformers
78+
at this time, so this function doesn't attempt to validate them.
79+
"""
80+
if data is None:
81+
return ()
82+
if isinstance(data, Iterable):
83+
if len(tuple(data)) >= 1:
84+
return data # type: ignore[return-value]
85+
else:
86+
return ()
87+
raise TypeError(
88+
f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead."
89+
)
90+
91+
7592
class V3JsonEncoder(json.JSONEncoder):
7693
def __init__(self, *args: Any, **kwargs: Any) -> None:
7794
self.indent = kwargs.pop("indent", config.get("json_indent"))
@@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata):
144161
dimension_names: tuple[str, ...] | None = None
145162
zarr_format: Literal[3] = field(default=3, init=False)
146163
node_type: Literal["array"] = field(default="array", init=False)
164+
storage_transformers: tuple[dict[str, JSON], ...]
147165

148166
def __init__(
149167
self,
@@ -156,6 +174,7 @@ def __init__(
156174
codecs: Iterable[Codec | dict[str, JSON]],
157175
attributes: None | dict[str, JSON],
158176
dimension_names: None | Iterable[str],
177+
storage_transformers: None | Iterable[dict[str, JSON]] = None,
159178
) -> None:
160179
"""
161180
Because the class is a frozen dataclass, we set attributes using object.__setattr__
@@ -168,6 +187,7 @@ def __init__(
168187
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
169188
attributes_parsed = parse_attributes(attributes)
170189
codecs_parsed_partial = parse_codecs(codecs)
190+
storage_transformers_parsed = parse_storage_transformers(storage_transformers)
171191

172192
array_spec = ArraySpec(
173193
shape=shape_parsed,
@@ -186,6 +206,7 @@ def __init__(
186206
object.__setattr__(self, "dimension_names", dimension_names_parsed)
187207
object.__setattr__(self, "fill_value", fill_value_parsed)
188208
object.__setattr__(self, "attributes", attributes_parsed)
209+
object.__setattr__(self, "storage_transformers", storage_transformers_parsed)
189210

190211
self._validate_metadata()
191212

tests/v3/test_array.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77

88
import zarr.api.asynchronous
99
from zarr import Array, AsyncArray, Group
10+
from zarr.codecs.bytes import BytesCodec
1011
from zarr.core.array import chunks_initialized
1112
from zarr.core.buffer.cpu import NDBuffer
12-
from zarr.core.common import ZarrFormat
13+
from zarr.core.common import JSON, ZarrFormat
1314
from zarr.core.group import AsyncGroup
1415
from zarr.core.indexing import ceildiv
1516
from zarr.core.sync import sync
@@ -275,6 +276,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->
275276
np.testing.assert_array_equal(actual[:], expected[:])
276277

277278

279+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
280+
def test_storage_transformers(store: MemoryStore) -> None:
281+
"""
282+
Test that providing an actual storage transformer produces a warning and otherwise passes through
283+
"""
284+
metadata_dict: dict[str, JSON] = {
285+
"zarr_format": 3,
286+
"node_type": "array",
287+
"shape": (10,),
288+
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
289+
"data_type": "uint8",
290+
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
291+
"codecs": (BytesCodec().to_dict(),),
292+
"fill_value": 0,
293+
"storage_transformers": ({"test": "should_raise"}),
294+
}
295+
match = "Arrays with storage transformers are not supported in zarr-python at this time."
296+
with pytest.raises(ValueError, match=match):
297+
Array.from_dict(StorePath(store), data=metadata_dict)
298+
299+
278300
@pytest.mark.parametrize("test_cls", [Array, AsyncArray])
279301
@pytest.mark.parametrize("nchunks", [2, 5, 10])
280302
def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None:

tests/v3/test_metadata/test_v3.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import Any
1515

1616
from zarr.abc.codec import Codec
17+
from zarr.core.common import JSON
1718

1819

1920
import numpy as np
@@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
196197
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
197198
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
198199
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
200+
@pytest.mark.parametrize("storage_transformers", [None, ()])
199201
def test_metadata_to_dict(
200202
chunk_grid: str,
201203
codecs: list[Codec],
@@ -204,6 +206,7 @@ def test_metadata_to_dict(
204206
dimension_separator: Literal[".", "/"] | None,
205207
dimension_names: Literal["nones", "strings", "missing"],
206208
attributes: None | dict[str, Any],
209+
storage_transformers: None | tuple[dict[str, JSON]],
207210
) -> None:
208211
shape = (1, 2, 3)
209212
data_type = "uint8"
@@ -234,6 +237,7 @@ def test_metadata_to_dict(
234237
"chunk_key_encoding": cke,
235238
"codecs": tuple(c.to_dict() for c in codecs),
236239
"fill_value": fill_value,
240+
"storage_transformers": storage_transformers,
237241
}
238242

239243
if attributes is not None:
@@ -244,9 +248,16 @@ def test_metadata_to_dict(
244248
metadata = ArrayV3Metadata.from_dict(metadata_dict)
245249
observed = metadata.to_dict()
246250
expected = metadata_dict.copy()
251+
252+
# if unset or None or (), storage_transformers gets normalized to ()
253+
assert observed["storage_transformers"] == ()
254+
observed.pop("storage_transformers")
255+
expected.pop("storage_transformers")
256+
247257
if attributes is None:
248258
assert observed["attributes"] == {}
249259
observed.pop("attributes")
260+
250261
if dimension_separator is None:
251262
if chunk_key_encoding == "default":
252263
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()

0 commit comments

Comments
 (0)