Skip to content

Commit 858d4fb

Browse files
committed
feature(store,group,array): stores learn to delete prefixes when
overwriting nodes - add Store.delete_dir and Store.delete_prefix - update array and group creation methods to call delete_dir - change list_prefix to return absolue keys
1 parent 8726734 commit 858d4fb

File tree

14 files changed

+115
-30
lines changed

14 files changed

+115
-30
lines changed

src/zarr/abc/store.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,36 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
371371
"""
372372
...
373373

374+
async def delete_dir(self, prefix: str, recursive: bool = True) -> None:
375+
"""
376+
Remove all keys and prefixes in the store that begin with a given prefix.
377+
"""
378+
if not self.supports_deletes:
379+
raise NotImplementedError
380+
if not self.supports_listing:
381+
raise NotImplementedError
382+
self._check_writable()
383+
if recursive:
384+
if not prefix.endswith("/"):
385+
prefix += "/"
386+
async for key in self.list_prefix(prefix):
387+
await self.delete(f"{key}")
388+
else:
389+
async for key in self.list_dir(prefix):
390+
await self.delete(f"{prefix}/{key}")
391+
392+
async def delete_prefix(self, prefix: str) -> None:
393+
"""
394+
Remove all keys in the store that begin with a given prefix.
395+
"""
396+
if not self.supports_deletes:
397+
raise NotImplementedError
398+
if not self.supports_listing:
399+
raise NotImplementedError
400+
self._check_writable()
401+
async for key in self.list_prefix(prefix):
402+
await self.delete(f"{key}")
403+
374404
def close(self) -> None:
375405
"""Close the store."""
376406
self._is_open = False

src/zarr/core/array.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,12 @@ async def _create_v3(
554554
attributes: dict[str, JSON] | None = None,
555555
exists_ok: bool = False,
556556
) -> AsyncArray[ArrayV3Metadata]:
557-
if not exists_ok:
557+
if exists_ok:
558+
if store_path.store.supports_deletes:
559+
await store_path.delete_dir(recursive=True)
560+
else:
561+
await ensure_no_existing_node(store_path, zarr_format=3)
562+
else:
558563
await ensure_no_existing_node(store_path, zarr_format=3)
559564

560565
shape = parse_shapelike(shape)
@@ -606,7 +611,12 @@ async def _create_v2(
606611
attributes: dict[str, JSON] | None = None,
607612
exists_ok: bool = False,
608613
) -> AsyncArray[ArrayV2Metadata]:
609-
if not exists_ok:
614+
if exists_ok:
615+
if store_path.store.supports_deletes:
616+
await store_path.delete_dir(recursive=True)
617+
else:
618+
await ensure_no_existing_node(store_path, zarr_format=2)
619+
else:
610620
await ensure_no_existing_node(store_path, zarr_format=2)
611621

612622
if order is None:

src/zarr/core/group.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,13 @@ async def from_store(
404404
zarr_format: ZarrFormat = 3,
405405
) -> AsyncGroup:
406406
store_path = await make_store_path(store)
407-
if not exists_ok:
407+
408+
if exists_ok:
409+
if store_path.store.supports_deletes:
410+
await store_path.delete_dir(recursive=True)
411+
else:
412+
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
413+
else:
408414
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
409415
attributes = attributes or {}
410416
group = cls(
@@ -710,19 +716,8 @@ def _getitem_consolidated(
710716

711717
async def delitem(self, key: str) -> None:
712718
store_path = self.store_path / key
713-
if self.metadata.zarr_format == 3:
714-
await (store_path / ZARR_JSON).delete()
715-
716-
elif self.metadata.zarr_format == 2:
717-
await asyncio.gather(
718-
(store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False
719-
(store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False
720-
(store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True
721-
)
722-
723-
else:
724-
raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}")
725719

720+
await store_path.delete_dir(recursive=True)
726721
if self.metadata.consolidated_metadata:
727722
self.metadata.consolidated_metadata.metadata.pop(key, None)
728723
await self._save_metadata()

src/zarr/storage/common.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,18 @@ async def delete(self) -> None:
101101
"""
102102
await self.store.delete(self.path)
103103

104+
async def delete_dir(self, recursive: bool = False) -> None:
105+
"""
106+
Delete all keys with the given prefix from the store.
107+
"""
108+
await self.store.delete_dir(self.path, recursive=recursive)
109+
110+
async def delete_prefix(self) -> None:
111+
"""
112+
Delete all keys with the given prefix from the store.
113+
"""
114+
await self.store.delete_prefix(self.path)
115+
104116
async def set_if_not_exists(self, default: Buffer) -> None:
105117
"""
106118
Store a key to ``value`` if the key is not already present.

src/zarr/storage/local.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ async def list(self) -> AsyncGenerator[str, None]:
226226

227227
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
228228
# docstring inherited
229-
to_strip = os.path.join(str(self.root / prefix))
229+
to_strip = str(self.root)
230+
if prefix.endswith("/"):
231+
prefix = prefix[:-1]
230232
for p in (self.root / prefix).rglob("*"):
231233
if p.is_file():
232234
yield str(p.relative_to(to_strip))

src/zarr/storage/logging.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,6 @@ def with_mode(self, mode: AccessModeLiteral) -> Self:
230230
log_level=self.log_level,
231231
log_handler=self.log_handler,
232232
)
233+
234+
235+
# TODO: wrap delete methods here

src/zarr/storage/memory.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from logging import getLogger
34
from typing import TYPE_CHECKING, Self
45

56
from zarr.abc.store import ByteRangeRequest, Store
@@ -14,6 +15,9 @@
1415
from zarr.core.common import AccessModeLiteral
1516

1617

18+
logger = getLogger(__name__)
19+
20+
1721
class MemoryStore(Store):
1822
"""
1923
In-memory store for testing purposes.
@@ -137,7 +141,7 @@ async def delete(self, key: str) -> None:
137141
try:
138142
del self._store_dict[key]
139143
except KeyError:
140-
pass
144+
logger.debug("Key %s does not exist.", key)
141145

142146
async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None:
143147
# docstring inherited
@@ -150,9 +154,10 @@ async def list(self) -> AsyncGenerator[str, None]:
150154

151155
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
152156
# docstring inherited
153-
for key in self._store_dict:
157+
# note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix)
158+
for key in list(self._store_dict):
154159
if key.startswith(prefix):
155-
yield key.removeprefix(prefix)
160+
yield key
156161

157162
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
158163
# docstring inherited

src/zarr/storage/remote.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
298298

299299
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
300300
# docstring inherited
301-
find_str = f"{self.path}/{prefix}"
302-
for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False):
303-
yield onefile.removeprefix(find_str)
301+
for onefile in await self.fs._find(
302+
f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False
303+
):
304+
yield onefile.removeprefix(f"{self.path}/")

src/zarr/storage/zip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
241241
# docstring inherited
242242
async for key in self.list():
243243
if key.startswith(prefix):
244-
yield key.removeprefix(prefix)
244+
yield key
245245

246246
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
247247
# docstring inherited

src/zarr/testing/store.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,7 @@ async def test_list(self, store: S) -> None:
249249
async def test_list_prefix(self, store: S) -> None:
250250
"""
251251
Test that the `list_prefix` method works as intended. Given a prefix, it should return
252-
all the keys in storage that start with this prefix. Keys should be returned with the shared
253-
prefix removed.
252+
all the keys in storage that start with this prefix.
254253
"""
255254
prefixes = ("", "a/", "a/b/", "a/b/c/")
256255
data = self.buffer_cls.from_bytes(b"")
@@ -264,7 +263,7 @@ async def test_list_prefix(self, store: S) -> None:
264263
expected: tuple[str, ...] = ()
265264
for key in store_dict:
266265
if key.startswith(prefix):
267-
expected += (key.removeprefix(prefix),)
266+
expected += (key,)
268267
expected = tuple(sorted(expected))
269268
assert observed == expected
270269

0 commit comments

Comments
 (0)