Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2862.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug that prevented the number of initialized chunks being counted properly.
2 changes: 1 addition & 1 deletion docs/user-guide/groups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ property. E.g.::
No. bytes : 8000000 (7.6M)
No. bytes stored : 1614
Storage ratio : 4956.6
Chunks Initialized : 0
Chunks Initialized : 10
>>> baz.info
Type : Array
Zarr format : 3
Expand Down
8 changes: 7 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
get_pipeline_class,
)
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
from zarr.storage._utils import _relativize_path

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -3737,7 +3738,12 @@ async def chunks_initialized(
store_contents = [
x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path)
]
return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents)
store_contents_relative = [
_relativize_path(key, array.store_path.path) for key in store_contents
]
return tuple(
chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative
)


def _build_parents(
Expand Down
51 changes: 50 additions & 1 deletion src/zarr/storage/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,60 @@
"""
Filter out instances of '' and join the remaining strings with '/'.
Because the root node of a zarr hierarchy is represented by an empty string,
Parameters
----------
paths : Iterable[str]
Returns
-------
str
Examples
--------
>>> _join_paths(["", "a", "b"])
'a/b'
>>> _join_paths(["a", "b", "c"])
'a/b/c'
"""
return "/".join(filter(lambda v: v != "", paths))


def _relativize_path(path: str, prefix: str) -> str:
"""
Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is "\"-delimited right here, or should this be "/"-delimited?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, ""-delimited is not correct, and it should be the other one

returned as-is. Otherwise, the prefix is removed from the path as well as the separator
string "\".
If ``prefix`` is not the empty string and``path`` does not start with ``prefix``
followed by a "/" character, then an error is raised.
Parameters
----------
path : str
The path to make relative to the prefix.
prefix : str
The prefix to make relative to.
Returns
-------
str
Examples
--------
>>> _relativize_paths("", "a/b")
'a/b'
>>> _relativize_paths("a/b", "a/b/c")
'c'
"""
if prefix == "":
return path
else:
_prefix = prefix + "/"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What behavior do we want for prefix="<some-prefix>/", i.e. a prefix ending with a /?

IIUC, this will currently always raise a ValueError, unless path for some reason starts with <some-prefix>//

Maybe we should have _prefix = prefix.rstrip("/") + "/"? Or if you're confident the callers are careful about not providing a prefix with a trailing slash this is fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I was assuming here that paths have already been normalized by normalize_path, in which case prefixes cannot end with "/". I should make this explicit in the docstring for the function here.

if not path.startswith(_prefix):
raise ValueError(f"The first component of {path} does not start with {prefix}.")

Check warning on line 127 in src/zarr/storage/_utils.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_utils.py#L127

Added line #L127 was not covered by tests
return path.removeprefix(f"{prefix}/")


def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
"""
Normalize the input paths according to the normalization scheme used for zarr node paths.
Expand Down
5 changes: 3 additions & 2 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]
assert observed == expected


async def test_chunks_initialized() -> None:
@pytest.mark.parametrize("path", ["", "foo"])
async def test_chunks_initialized(path: str) -> None:
"""
Test that chunks_initialized accurately returns the keys of stored chunks.
"""
store = MemoryStore()
arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4")
arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4")

chunks_accumulated = tuple(
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))
Expand Down