diff --git a/changes/2862.bugfix.rst b/changes/2862.bugfix.rst new file mode 100644 index 0000000000..bbe6f0746e --- /dev/null +++ b/changes/2862.bugfix.rst @@ -0,0 +1 @@ +Fix a bug that prevented the number of initialized chunks being counted properly. \ No newline at end of file diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst index 4268004f70..d5a0a7ccee 100644 --- a/docs/user-guide/groups.rst +++ b/docs/user-guide/groups.rst @@ -140,7 +140,7 @@ property. E.g.:: No. bytes : 8000000 (7.6M) No. bytes stored : 1614 Storage ratio : 4956.6 - Chunks Initialized : 0 + Chunks Initialized : 10 >>> baz.info Type : Array Zarr format : 3 diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index c6217a3d93..9852bf8d5f 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -117,6 +117,7 @@ get_pipeline_class, ) from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path +from zarr.storage._utils import _relativize_path if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -3737,7 +3738,12 @@ async def chunks_initialized( store_contents = [ x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] - return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) + store_contents_relative = [ + _relativize_path(path=key, prefix=array.store_path.path) for key in store_contents + ] + return tuple( + chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative + ) def _build_parents( diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index eda4342f47..145790278c 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -74,11 +74,62 @@ def _join_paths(paths: Iterable[str]) -> str: """ Filter out instances of '' and join the remaining strings with '/'. - Because the root node of a zarr hierarchy is represented by an empty string, + Parameters + ---------- + paths : Iterable[str] + + Returns + ------- + str + + Examples + -------- + >>> _join_paths(["", "a", "b"]) + 'a/b' + >>> _join_paths(["a", "b", "c"]) + 'a/b/c' """ return "/".join(filter(lambda v: v != "", paths)) +def _relativize_path(*, path: str, prefix: str) -> str: + """ + Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is + returned as-is. Otherwise, the prefix is removed from the path as well as the separator + string "/". + + If ``prefix`` is not the empty string and ``path`` does not start with ``prefix`` + followed by a "/" character, then an error is raised. + + This function assumes that the prefix does not end with "/". + + Parameters + ---------- + path : str + The path to make relative to the prefix. + prefix : str + The prefix to make the path relative to. + + Returns + ------- + str + + Examples + -------- + >>> _relativize_path(path="", prefix="a/b") + 'a/b' + >>> _relativize_path(path="a/b", prefix="a/b/c") + 'c' + """ + if prefix == "": + return path + else: + _prefix = prefix + "/" + if not path.startswith(_prefix): + raise ValueError(f"The first component of {path} does not start with {prefix}.") + return path.removeprefix(f"{prefix}/") + + def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]: """ Normalize the input paths according to the normalization scheme used for zarr node paths. diff --git a/tests/test_array.py b/tests/test_array.py index 4be9bbde43..989fe30592 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]] assert observed == expected -async def test_chunks_initialized() -> None: +@pytest.mark.parametrize("path", ["", "foo"]) +async def test_chunks_initialized(path: str) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ store = MemoryStore() - arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4") + arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4") chunks_accumulated = tuple( accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys())) diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index 87d0e6e40d..1ac410954b 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -8,7 +8,13 @@ from zarr.core.common import AccessModeLiteral, ZarrFormat from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath from zarr.storage._common import contains_array, contains_group, make_store_path -from zarr.storage._utils import _join_paths, _normalize_path_keys, _normalize_paths, normalize_path +from zarr.storage._utils import ( + _join_paths, + _normalize_path_keys, + _normalize_paths, + _relativize_path, + normalize_path, +) @pytest.mark.parametrize("path", ["foo", "foo/bar"]) @@ -221,3 +227,27 @@ def test_normalize_path_keys(): """ data = {"a": 10, "//b": 10} assert _normalize_path_keys(data) == {normalize_path(k): v for k, v in data.items()} + + +@pytest.mark.parametrize( + ("path", "prefix", "expected"), + [ + ("a", "", "a"), + ("a/b/c", "a/b", "c"), + ("a/b/c", "a", "b/c"), + ], +) +def test_relativize_path_valid(path: str, prefix: str, expected: str) -> None: + """ + Test the normal behavior of the _relativize_path function. Prefixes should be removed from the + path argument. + """ + assert _relativize_path(path=path, prefix=prefix) == expected + + +def test_relativize_path_invalid() -> None: + path = "a/b/c" + prefix = "b" + msg = f"The first component of {path} does not start with {prefix}." + with pytest.raises(ValueError, match=msg): + _relativize_path(path="a/b/c", prefix="b")