Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@
if isinstance(data, ArrayMetadata):
return data
elif isinstance(data, dict):
if data["zarr_format"] == 3:
zarr_format = data.get("zarr_format")
if zarr_format == 3:

Check warning on line 142 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L141-L142

Added lines #L141 - L142 were not covered by tests
meta_out = ArrayV3Metadata.from_dict(data)
if len(meta_out.storage_transformers) > 0:
msg = (
Expand All @@ -147,8 +148,10 @@
)
raise ValueError(msg)
return meta_out
elif data["zarr_format"] == 2:
elif zarr_format == 2:

Check warning on line 151 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L151

Added line #L151 was not covered by tests
return ArrayV2Metadata.from_dict(data)
else:
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")

Check warning on line 154 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L154

Added line #L154 was not covered by tests
raise TypeError


Expand Down Expand Up @@ -266,17 +269,6 @@
store_path: StorePath,
config: ArrayConfigLike | None = None,
) -> None:
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
# TODO: remove this when we extensively type the dict representation of metadata
_metadata = cast(dict[str, JSON], metadata)
if zarr_format == 2:
metadata = ArrayV2Metadata.from_dict(_metadata)
elif zarr_format == 3:
metadata = ArrayV3Metadata.from_dict(_metadata)
else:
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")

metadata_parsed = parse_array_metadata(metadata)
config_parsed = parse_array_config(config)

Expand Down
55 changes: 36 additions & 19 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,19 @@ def test_create(memory_store: Store) -> None:

# TODO: parametrize over everything this function takes
@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_create_array(store: Store) -> None:
def test_create_array(store: Store, zarr_format: ZarrFormat) -> None:
attrs: dict[str, JSON] = {"foo": 100} # explicit type annotation to avoid mypy error
shape = (10, 10)
path = "foo"
data_val = 1
array_w = create_array(
store, name=path, shape=shape, attributes=attrs, chunks=shape, dtype="uint8"
store,
name=path,
shape=shape,
attributes=attrs,
chunks=shape,
dtype="uint8",
zarr_format=zarr_format,
)
array_w[:] = data_val
assert array_w.shape == shape
Expand All @@ -85,18 +91,27 @@ def test_create_array(store: Store) -> None:


@pytest.mark.parametrize("write_empty_chunks", [True, False])
def test_write_empty_chunks_warns(write_empty_chunks: bool) -> None:
def test_write_empty_chunks_warns(write_empty_chunks: bool, zarr_format: ZarrFormat) -> None:
"""
Test that using the `write_empty_chunks` kwarg on array access will raise a warning.
"""
match = "The `write_empty_chunks` keyword argument .*"
with pytest.warns(RuntimeWarning, match=match):
_ = zarr.array(
data=np.arange(10), shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks
data=np.arange(10),
shape=(10,),
dtype="uint8",
write_empty_chunks=write_empty_chunks,
zarr_format=zarr_format,
)

with pytest.warns(RuntimeWarning, match=match):
_ = zarr.create(shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks)
_ = zarr.create(
shape=(10,),
dtype="uint8",
write_empty_chunks=write_empty_chunks,
zarr_format=zarr_format,
)


@pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"])
Expand All @@ -113,18 +128,18 @@ def test_open_normalized_path(
assert node.path == normalize_path(path)


async def test_open_array(memory_store: MemoryStore) -> None:
async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> None:
store = memory_store

# open array, create if doesn't exist
z = open(store=store, shape=100)
z = open(store=store, shape=100, zarr_format=zarr_format)
assert isinstance(z, Array)
assert z.shape == (100,)

# open array, overwrite
# store._store_dict = {}
store = MemoryStore()
z = open(store=store, shape=200)
z = open(store=store, shape=200, zarr_format=zarr_format)
assert isinstance(z, Array)
assert z.shape == (200,)

Expand All @@ -138,7 +153,7 @@ async def test_open_array(memory_store: MemoryStore) -> None:

# path not found
with pytest.raises(FileNotFoundError):
open(store="doesnotexist", mode="r")
open(store="doesnotexist", mode="r", zarr_format=zarr_format)


@pytest.mark.parametrize("store", ["memory"], indirect=True)
Expand All @@ -161,9 +176,9 @@ async def test_open_group(memory_store: MemoryStore) -> None:
assert "foo" in g

# open group, overwrite
# g = open_group(store=store)
# assert isinstance(g, Group)
# assert "foo" not in g
g = open_group(store=store, mode="w")
assert isinstance(g, Group)
assert "foo" not in g

# open group, read-only
store_cls = type(store)
Expand Down Expand Up @@ -306,7 +321,6 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None:
zarr.open(store=tmp_path, mode="w-")


@pytest.mark.parametrize("zarr_format", [2, 3])
def test_array_order(zarr_format: ZarrFormat) -> None:
arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format)
expected = zarr.config.get("array.order")
Expand All @@ -322,7 +336,6 @@ def test_array_order(zarr_format: ZarrFormat) -> None:


@pytest.mark.parametrize("order", ["C", "F"])
@pytest.mark.parametrize("zarr_format", [2, 3])
def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None:
with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"):
arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format)
Expand Down Expand Up @@ -1093,13 +1106,16 @@ def test_open_falls_back_to_open_group() -> None:
assert group.attrs == {"key": "value"}


async def test_open_falls_back_to_open_group_async() -> None:
async def test_open_falls_back_to_open_group_async(zarr_format: ZarrFormat) -> None:
# https://github.com/zarr-developers/zarr-python/issues/2309
store = MemoryStore()
await zarr.api.asynchronous.open_group(store, attributes={"key": "value"})
await zarr.api.asynchronous.open_group(
store, attributes={"key": "value"}, zarr_format=zarr_format
)

group = await zarr.api.asynchronous.open(store=store)
assert isinstance(group, zarr.core.group.AsyncGroup)
assert group.metadata.zarr_format == zarr_format
assert group.attrs == {"key": "value"}


Expand Down Expand Up @@ -1135,13 +1151,14 @@ async def test_metadata_validation_error() -> None:
["local", "memory", "zip"],
indirect=True,
)
def test_open_array_with_mode_r_plus(store: Store) -> None:
def test_open_array_with_mode_r_plus(store: Store, zarr_format: ZarrFormat) -> None:
# 'r+' means read/write (must exist)
with pytest.raises(FileNotFoundError):
zarr.open_array(store=store, mode="r+")
zarr.ones(store=store, shape=(3, 3))
zarr.open_array(store=store, mode="r+", zarr_format=zarr_format)
zarr.ones(store=store, shape=(3, 3), zarr_format=zarr_format)
z2 = zarr.open_array(store=store, mode="r+")
assert isinstance(z2, Array)
assert z2.metadata.zarr_format == zarr_format
result = z2[:]
assert isinstance(result, NDArrayLike)
assert (result == 1).all()
Expand Down
126 changes: 107 additions & 19 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.chunk_grids import _auto_partition
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
from zarr.core.common import JSON, MemoryOrder, ZarrFormat
from zarr.core.group import AsyncGroup
from zarr.core.indexing import BasicIndexer, ceildiv
Expand All @@ -51,7 +52,7 @@

if TYPE_CHECKING:
from zarr.core.array_spec import ArrayConfigLike
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v2 import ArrayV2Metadata


@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
Expand Down Expand Up @@ -227,10 +228,13 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str
assert arr.fill_value.dtype == arr.dtype


def test_create_positional_args_deprecated() -> None:
store = MemoryStore()
with pytest.warns(FutureWarning, match="Pass"):
zarr.Array.create(store, (2, 2), dtype="f8")
async def test_create_deprecated() -> None:
with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")):
await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[call-overload]
with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")):
zarr.Array.create(MemoryStore(), (2, 2), dtype="f8")


def test_selection_positional_args_deprecated() -> None:
Expand Down Expand Up @@ -321,24 +325,47 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_storage_transformers(store: MemoryStore) -> None:
@pytest.mark.parametrize("zarr_format", [2, 3, "invalid"])
def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict: dict[str, JSON] = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
metadata_dict: dict[str, JSON]
if zarr_format == 3:
metadata_dict = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
else:
metadata_dict = {
"zarr_format": zarr_format,
"shape": (10,),
"chunks": (1,),
"dtype": "uint8",
"dimension_separator": ".",
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"order": "C",
"storage_transformers": ({"test": "should_raise"}),
}
if zarr_format == 3:
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)
elif zarr_format == 2:
# no warning
Array.from_dict(StorePath(store), data=metadata_dict)
else:
match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3"
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)


@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]])
Expand Down Expand Up @@ -1105,6 +1132,56 @@ async def test_v3_chunk_encoding(
assert arr.filters == filters_expected
assert arr.compressors == compressors_expected

@staticmethod
@pytest.mark.parametrize("name", ["v2", "default", "invalid"])
@pytest.mark.parametrize("separator", [".", "/"])
async def test_chunk_key_encoding(
name: str, separator: str, zarr_format: ZarrFormat, store: MemoryStore
) -> None:
chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item"]
error_msg = ""
if name == "invalid":
error_msg = "Unknown chunk key encoding."
if zarr_format == 2 and name == "default":
error_msg = "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the chunk key encoding must be 'v2'."
if error_msg:
with pytest.raises(ValueError, match=re.escape(error_msg)):
arr = await create_array(
store=store,
dtype="uint8",
shape=(10,),
chunks=(1,),
zarr_format=zarr_format,
chunk_key_encoding=chunk_key_encoding,
)
else:
arr = await create_array(
store=store,
dtype="uint8",
shape=(10,),
chunks=(1,),
zarr_format=zarr_format,
chunk_key_encoding=chunk_key_encoding,
)
if isinstance(arr.metadata, ArrayV2Metadata):
assert arr.metadata.dimension_separator == separator

@staticmethod
@pytest.mark.parametrize(
("kwargs", "error_msg"),
[
({"serializer": "bytes"}, "Zarr format 2 arrays do not support `serializer`."),
({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."),
],
)
async def test_invalid_v2_arguments(
kwargs: dict[str, Any], error_msg: str, store: MemoryStore
) -> None:
with pytest.raises(ValueError, match=re.escape(error_msg)):
await create_array(
store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs
)

@staticmethod
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
@pytest.mark.parametrize(
Expand Down Expand Up @@ -1584,3 +1661,14 @@ async def test_sharding_coordinate_selection() -> None:
result = arr[1, [0, 1]] # type: ignore[index]
assert isinstance(result, NDArrayLike)
assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()

@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
def test_array_repr(store: Store) -> None:
shape = (2, 3, 4)
dtype = "uint8"
arr = zarr.create_array(
store,
shape=shape,
dtype=dtype
)
assert str(arr) == f"<Array {store} shape={shape} dtype={dtype}>"