Skip to content

Commit c57e862

Browse files
committed
unbreak chunks initialized
1 parent 5f49d24 commit c57e862

File tree

5 files changed

+62
-5
lines changed

5 files changed

+62
-5
lines changed

changes/2862.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug that prevented the number of initialized chunks being counted properly.

docs/user-guide/groups.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ property. E.g.::
140140
No. bytes : 8000000 (7.6M)
141141
No. bytes stored : 1614
142142
Storage ratio : 4956.6
143-
Chunks Initialized : 0
143+
Chunks Initialized : 10
144144
>>> baz.info
145145
Type : Array
146146
Zarr format : 3

src/zarr/core/array.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@
116116
get_pipeline_class,
117117
)
118118
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
119+
from zarr.storage._utils import _relativize_path
119120

120121
if TYPE_CHECKING:
121122
from collections.abc import Iterator, Sequence
@@ -3730,7 +3731,12 @@ async def chunks_initialized(
37303731
store_contents = [
37313732
x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path)
37323733
]
3733-
return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents)
3734+
store_contents_relative = [
3735+
_relativize_path(key, array.store_path.path) for key in store_contents
3736+
]
3737+
return tuple(
3738+
chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative
3739+
)
37343740

37353741

37363742
def _build_parents(

src/zarr/storage/_utils.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,60 @@ def _join_paths(paths: Iterable[str]) -> str:
7474
"""
7575
Filter out instances of '' and join the remaining strings with '/'.
7676
77-
Because the root node of a zarr hierarchy is represented by an empty string,
77+
Parameters
78+
----------
79+
paths : Iterable[str]
80+
81+
Returns
82+
-------
83+
str
84+
85+
Examples
86+
--------
87+
>>> _join_paths(["", "a", "b"])
88+
'a/b'
89+
>>> _join_paths(["a", "b", "c"])
90+
'a/b/c'
7891
"""
7992
return "/".join(filter(lambda v: v != "", paths))
8093

8194

95+
def _relativize_path(path: str, prefix: str) -> str:
96+
"""
97+
Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is
98+
returned as-is. Otherwise, the prefix is removed from the path as well as the separator
99+
string "\".
100+
101+
If ``prefix`` is not the empty string and``path`` does not start with ``prefix``
102+
followed by a "/" character, then an error is raised.
103+
104+
Parameters
105+
----------
106+
path : str
107+
The path to make relative to the prefix.
108+
prefix : str
109+
The prefix to make relative to.
110+
111+
Returns
112+
-------
113+
str
114+
115+
Examples
116+
--------
117+
>>> _relativize_paths("", "a/b")
118+
'a/b'
119+
>>> _relativize_paths("a/b", "a/b/c")
120+
'c'
121+
"""
122+
if prefix == "":
123+
return path
124+
else:
125+
_prefix = prefix + "/"
126+
if not path.startswith(_prefix):
127+
raise ValueError(f"The first component of {path} does not start with {prefix}.")
128+
return path.removeprefix(f"{prefix}/")
129+
130+
82131
def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
83132
"""
84133
Normalize the input paths according to the normalization scheme used for zarr node paths.

tests/test_array.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]
387387
assert observed == expected
388388

389389

390-
async def test_chunks_initialized() -> None:
390+
@pytest.mark.parametrize("path", ["", "foo"])
391+
async def test_chunks_initialized(path: str) -> None:
391392
"""
392393
Test that chunks_initialized accurately returns the keys of stored chunks.
393394
"""
394395
store = MemoryStore()
395-
arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4")
396+
arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4")
396397

397398
chunks_accumulated = tuple(
398399
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))

0 commit comments

Comments
 (0)