Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2862.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug that prevented the number of initialized chunks being counted properly.
2 changes: 1 addition & 1 deletion docs/user-guide/groups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ property. E.g.::
No. bytes : 8000000 (7.6M)
No. bytes stored : 1614
Storage ratio : 4956.6
Chunks Initialized : 0
Chunks Initialized : 10
>>> baz.info
Type : Array
Zarr format : 3
Expand Down
8 changes: 7 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
get_pipeline_class,
)
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
from zarr.storage._utils import _relativize_path

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -3737,7 +3738,12 @@ async def chunks_initialized(
store_contents = [
x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path)
]
return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents)
store_contents_relative = [
_relativize_path(path=key, prefix=array.store_path.path) for key in store_contents
]
return tuple(
chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative
)


def _build_parents(
Expand Down
53 changes: 52 additions & 1 deletion src/zarr/storage/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,62 @@
"""
Filter out instances of '' and join the remaining strings with '/'.

Because the root node of a zarr hierarchy is represented by an empty string,
Parameters
----------
paths : Iterable[str]

Returns
-------
str

Examples
--------
>>> _join_paths(["", "a", "b"])
'a/b'
>>> _join_paths(["a", "b", "c"])
'a/b/c'
"""
return "/".join(filter(lambda v: v != "", paths))


def _relativize_path(*, path: str, prefix: str) -> str:
"""
Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is
returned as-is. Otherwise, the prefix is removed from the path as well as the separator
string "/".

If ``prefix`` is not the empty string and ``path`` does not start with ``prefix``
followed by a "/" character, then an error is raised.

This function assumes that the prefix does not end with "/".

Parameters
----------
path : str
The path to make relative to the prefix.
prefix : str
The prefix to make the path relative to.

Returns
-------
str

Examples
--------
>>> _relativize_path(path="", prefix="a/b")
'a/b'
>>> _relativize_path(path="a/b", prefix="a/b/c")
'c'
"""
if prefix == "":
return path

Check warning on line 125 in src/zarr/storage/_utils.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_utils.py#L124-L125

Added lines #L124 - L125 were not covered by tests
else:
_prefix = prefix + "/"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What behavior do we want for prefix="<some-prefix>/", i.e. a prefix ending with a /?

IIUC, this will currently always raise a ValueError, unless path for some reason starts with <some-prefix>//

Maybe we should have _prefix = prefix.rstrip("/") + "/"? Or if you're confident the callers are careful about not providing a prefix with a trailing slash this is fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I was assuming here that paths have already been normalized by normalize_path, in which case prefixes cannot end with "/". I should make this explicit in the docstring for the function here.

if not path.startswith(_prefix):
raise ValueError(f"The first component of {path} does not start with {prefix}.")
return path.removeprefix(f"{prefix}/")

Check warning on line 130 in src/zarr/storage/_utils.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_utils.py#L127-L130

Added lines #L127 - L130 were not covered by tests


def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
"""
Normalize the input paths according to the normalization scheme used for zarr node paths.
Expand Down
5 changes: 3 additions & 2 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]
assert observed == expected


async def test_chunks_initialized() -> None:
@pytest.mark.parametrize("path", ["", "foo"])
async def test_chunks_initialized(path: str) -> None:
"""
Test that chunks_initialized accurately returns the keys of stored chunks.
"""
store = MemoryStore()
arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4")
arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4")

chunks_accumulated = tuple(
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))
Expand Down
32 changes: 31 additions & 1 deletion tests/test_store/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from zarr.core.common import AccessModeLiteral, ZarrFormat
from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath
from zarr.storage._common import contains_array, contains_group, make_store_path
from zarr.storage._utils import _join_paths, _normalize_path_keys, _normalize_paths, normalize_path
from zarr.storage._utils import (
_join_paths,
_normalize_path_keys,
_normalize_paths,
_relativize_path,
normalize_path,
)


@pytest.mark.parametrize("path", ["foo", "foo/bar"])
Expand Down Expand Up @@ -221,3 +227,27 @@ def test_normalize_path_keys():
"""
data = {"a": 10, "//b": 10}
assert _normalize_path_keys(data) == {normalize_path(k): v for k, v in data.items()}


@pytest.mark.parametrize(
("path", "prefix", "expected"),
[
("a", "", "a"),
("a/b/c", "a/b", "c"),
("a/b/c", "a", "b/c"),
],
)
def test_relativize_path_valid(path: str, prefix: str, expected: str) -> None:
"""
Test the normal behavior of the _relativize_path function. Prefixes should be removed from the
path argument.
"""
assert _relativize_path(path=path, prefix=prefix) == expected


def test_relativize_path_invalid() -> None:
path = "a/b/c"
prefix = "b"
msg = f"The first component of {path} does not start with {prefix}."
with pytest.raises(ValueError, match=msg):
_relativize_path(path="a/b/c", prefix="b")