From c57e8624430a346881eec96125c8120457fca300 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Apr 2025 18:41:54 -0400 Subject: [PATCH 1/4] unbreak chunks initialized --- changes/2862.bugfix.rst | 1 + docs/user-guide/groups.rst | 2 +- src/zarr/core/array.py | 8 +++++- src/zarr/storage/_utils.py | 51 +++++++++++++++++++++++++++++++++++++- tests/test_array.py | 5 ++-- 5 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 changes/2862.bugfix.rst diff --git a/changes/2862.bugfix.rst b/changes/2862.bugfix.rst new file mode 100644 index 0000000000..bbe6f0746e --- /dev/null +++ b/changes/2862.bugfix.rst @@ -0,0 +1 @@ +Fix a bug that prevented the number of initialized chunks being counted properly. \ No newline at end of file diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst index 4268004f70..d5a0a7ccee 100644 --- a/docs/user-guide/groups.rst +++ b/docs/user-guide/groups.rst @@ -140,7 +140,7 @@ property. E.g.:: No. bytes : 8000000 (7.6M) No. bytes stored : 1614 Storage ratio : 4956.6 - Chunks Initialized : 0 + Chunks Initialized : 10 >>> baz.info Type : Array Zarr format : 3 diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f2c88c508b..d5280e8602 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -116,6 +116,7 @@ get_pipeline_class, ) from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path +from zarr.storage._utils import _relativize_path if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -3730,7 +3731,12 @@ async def chunks_initialized( store_contents = [ x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] - return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) + store_contents_relative = [ + _relativize_path(key, array.store_path.path) for key in store_contents + ] + return tuple( + chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative + ) def _build_parents( diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index eda4342f47..b6569a7944 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -74,11 +74,60 @@ def _join_paths(paths: Iterable[str]) -> str: """ Filter out instances of '' and join the remaining strings with '/'. - Because the root node of a zarr hierarchy is represented by an empty string, + Parameters + ---------- + paths : Iterable[str] + + Returns + ------- + str + + Examples + -------- + >>> _join_paths(["", "a", "b"]) + 'a/b' + >>> _join_paths(["a", "b", "c"]) + 'a/b/c' """ return "/".join(filter(lambda v: v != "", paths)) +def _relativize_path(path: str, prefix: str) -> str: + """ + Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is + returned as-is. Otherwise, the prefix is removed from the path as well as the separator + string "\". + + If ``prefix`` is not the empty string and``path`` does not start with ``prefix`` + followed by a "/" character, then an error is raised. + + Parameters + ---------- + path : str + The path to make relative to the prefix. + prefix : str + The prefix to make relative to. + + Returns + ------- + str + + Examples + -------- + >>> _relativize_paths("", "a/b") + 'a/b' + >>> _relativize_paths("a/b", "a/b/c") + 'c' + """ + if prefix == "": + return path + else: + _prefix = prefix + "/" + if not path.startswith(_prefix): + raise ValueError(f"The first component of {path} does not start with {prefix}.") + return path.removeprefix(f"{prefix}/") + + def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]: """ Normalize the input paths according to the normalization scheme used for zarr node paths. diff --git a/tests/test_array.py b/tests/test_array.py index 5c3c556dfb..9bb5c45fc9 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]] assert observed == expected -async def test_chunks_initialized() -> None: +@pytest.mark.parametrize("path", ["", "foo"]) +async def test_chunks_initialized(path: str) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ store = MemoryStore() - arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4") + arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4") chunks_accumulated = tuple( accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys())) From e2e6c83c0c62a12c8b9c83524f76753628d3e2ca Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Mon, 12 May 2025 11:54:52 +0200 Subject: [PATCH 2/4] Update src/zarr/storage/_utils.py Co-authored-by: Tom Augspurger --- src/zarr/storage/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index b6569a7944..351bd2ee59 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -98,7 +98,7 @@ def _relativize_path(path: str, prefix: str) -> str: returned as-is. Otherwise, the prefix is removed from the path as well as the separator string "\". - If ``prefix`` is not the empty string and``path`` does not start with ``prefix`` + If ``prefix`` is not the empty string and ``path`` does not start with ``prefix`` followed by a "/" character, then an error is raised. Parameters From ab3e1119d2008b67bed3278e6f68edbb98c48811 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 12 May 2025 13:57:22 +0200 Subject: [PATCH 3/4] update docstring --- src/zarr/storage/_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index 351bd2ee59..1b1545fc19 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -94,19 +94,21 @@ def _join_paths(paths: Iterable[str]) -> str: def _relativize_path(path: str, prefix: str) -> str: """ - Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is + Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is returned as-is. Otherwise, the prefix is removed from the path as well as the separator - string "\". + string "/". If ``prefix`` is not the empty string and ``path`` does not start with ``prefix`` followed by a "/" character, then an error is raised. + This function assumes that the prefix does not end with "/". + Parameters ---------- path : str The path to make relative to the prefix. prefix : str - The prefix to make relative to. + The prefix to make the path relative to. Returns ------- @@ -114,9 +116,9 @@ def _relativize_path(path: str, prefix: str) -> str: Examples -------- - >>> _relativize_paths("", "a/b") + >>> _relativize_path("", "a/b") 'a/b' - >>> _relativize_paths("a/b", "a/b/c") + >>> _relativize_path("a/b", "a/b/c") 'c' """ if prefix == "": From 3db2642628a38ce21ea6b96d93bb229b682bbd54 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 12 May 2025 15:36:09 +0200 Subject: [PATCH 4/4] make relativize_paths kw-only, and add tests --- src/zarr/core/array.py | 2 +- src/zarr/storage/_utils.py | 6 +++--- tests/test_store/test_core.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index ce2a362a37..9852bf8d5f 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3739,7 +3739,7 @@ async def chunks_initialized( x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] store_contents_relative = [ - _relativize_path(key, array.store_path.path) for key in store_contents + _relativize_path(path=key, prefix=array.store_path.path) for key in store_contents ] return tuple( chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index 1b1545fc19..145790278c 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -92,7 +92,7 @@ def _join_paths(paths: Iterable[str]) -> str: return "/".join(filter(lambda v: v != "", paths)) -def _relativize_path(path: str, prefix: str) -> str: +def _relativize_path(*, path: str, prefix: str) -> str: """ Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is returned as-is. Otherwise, the prefix is removed from the path as well as the separator @@ -116,9 +116,9 @@ def _relativize_path(path: str, prefix: str) -> str: Examples -------- - >>> _relativize_path("", "a/b") + >>> _relativize_path(path="", prefix="a/b") 'a/b' - >>> _relativize_path("a/b", "a/b/c") + >>> _relativize_path(path="a/b", prefix="a/b/c") 'c' """ if prefix == "": diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index 87d0e6e40d..1ac410954b 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -8,7 +8,13 @@ from zarr.core.common import AccessModeLiteral, ZarrFormat from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath from zarr.storage._common import contains_array, contains_group, make_store_path -from zarr.storage._utils import _join_paths, _normalize_path_keys, _normalize_paths, normalize_path +from zarr.storage._utils import ( + _join_paths, + _normalize_path_keys, + _normalize_paths, + _relativize_path, + normalize_path, +) @pytest.mark.parametrize("path", ["foo", "foo/bar"]) @@ -221,3 +227,27 @@ def test_normalize_path_keys(): """ data = {"a": 10, "//b": 10} assert _normalize_path_keys(data) == {normalize_path(k): v for k, v in data.items()} + + +@pytest.mark.parametrize( + ("path", "prefix", "expected"), + [ + ("a", "", "a"), + ("a/b/c", "a/b", "c"), + ("a/b/c", "a", "b/c"), + ], +) +def test_relativize_path_valid(path: str, prefix: str, expected: str) -> None: + """ + Test the normal behavior of the _relativize_path function. Prefixes should be removed from the + path argument. + """ + assert _relativize_path(path=path, prefix=prefix) == expected + + +def test_relativize_path_invalid() -> None: + path = "a/b/c" + prefix = "b" + msg = f"The first component of {path} does not start with {prefix}." + with pytest.raises(ValueError, match=msg): + _relativize_path(path="a/b/c", prefix="b")