Skip to content

Commit 1ece962

Browse files
authored
Merge branch 'main' into fix/chunkkeyencodinglike
2 parents dd9d99f + 80aea2a commit 1ece962

File tree

15 files changed

+419
-58
lines changed

15 files changed

+419
-58
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ src/zarr/_version.py
8383
data/*
8484
src/fixture/
8585
fixture/
86+
junit.xml
8687

8788
.DS_Store
8889
tests/.hypothesis

changes/2693.bugfix.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Implement open() for LoggingStore
2+
LoggingStore is now a generic class.
3+
Use stdout rather than stderr as the default stream for LoggingStore
4+
Ensure that ZipStore is open before getting or setting any values
5+
Update equality for LoggingStore and WrapperStore such that 'other' must also be a LoggingStore or WrapperStore respectively, rather than only checking the types of the stores they wrap.
6+
Indicate StoreTest's `test_store_repr`, `test_store_supports_writes`, `test_store_supports_partial_writes`, and `test_store_supports_listing` need to be implemented using `@abstractmethod` rather than `NotImplementedError`.
7+
Separate instantiating and opening a store in StoreTests
8+
Test using Store as a context manager in StoreTests
9+
Match the errors raised by read only stores in StoreTests
10+
Test that a ValueError is raise for invalid byte range syntax in StoreTests
11+
Test getsize() and getsize_prefix() in StoreTests
12+
Test the error raised for invalid buffer arguments in StoreTests
13+
Test that data can be written to a store that's not yet open using the store.set method in StoreTests

changes/2762.bugfix.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed ZipStore to make sure the correct attributes are saved when instances are pickled.
2+
This fixes a previous bug that prevent using ZipStore with a ProcessPoolExecutor.

src/zarr/abc/store.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ async def get(
176176
Parameters
177177
----------
178178
key : str
179+
prototype : BufferPrototype
180+
The prototype of the output buffer. Stores may support a default buffer prototype.
179181
byte_range : ByteRequest, optional
180-
181182
ByteRequest may be one of the following. If not provided, all data associated with the key is retrieved.
182-
183183
- RangeByteRequest(int, int): Request a specific range of bytes in the form (start, end). The end is exclusive. If the given range is zero-length or starts after the end of the object, an error will be returned. Additionally, if the range ends after the end of the object, the entire remainder of the object will be returned. Otherwise, the exact requested range will be returned.
184184
- OffsetByteRequest(int): Request all bytes starting from a given byte offset. This is equivalent to bytes={int}- as an HTTP header.
185185
- SuffixByteRequest(int): Request the last int bytes. Note that here, int is the size of the request, not the byte offset. This is equivalent to bytes=-{int} as an HTTP header.
@@ -200,6 +200,8 @@ async def get_partial_values(
200200
201201
Parameters
202202
----------
203+
prototype : BufferPrototype
204+
The prototype of the output buffer. Stores may support a default buffer prototype.
203205
key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
204206
Ordered set of key, range pairs, a key may occur multiple times with different ranges
205207

src/zarr/storage/_fsspec.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
Store,
1111
SuffixByteRequest,
1212
)
13+
from zarr.core.buffer import Buffer
1314
from zarr.storage._common import _dereference_path
1415

1516
if TYPE_CHECKING:
1617
from collections.abc import AsyncIterator, Iterable
1718

1819
from fsspec.asyn import AsyncFileSystem
1920

20-
from zarr.core.buffer import Buffer, BufferPrototype
21+
from zarr.core.buffer import BufferPrototype
2122
from zarr.core.common import BytesLike
2223

2324

@@ -264,6 +265,10 @@ async def set(
264265
if not self._is_open:
265266
await self._open()
266267
self._check_writable()
268+
if not isinstance(value, Buffer):
269+
raise TypeError(
270+
f"FsspecStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
271+
)
267272
path = _dereference_path(self.path, key)
268273
# write data
269274
if byte_range:

src/zarr/storage/_local.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(self, root: Path | str, *, read_only: bool = False) -> None:
9696
root = Path(root)
9797
if not isinstance(root, Path):
9898
raise TypeError(
99-
f'"root" must be a string or Path instance. Got an object with type {type(root)} instead.'
99+
f"'root' must be a string or Path instance. Got an instance of {type(root)} instead."
100100
)
101101
self.root = root
102102

@@ -169,7 +169,9 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None:
169169
self._check_writable()
170170
assert isinstance(key, str)
171171
if not isinstance(value, Buffer):
172-
raise TypeError("LocalStore.set(): `value` must a Buffer instance")
172+
raise TypeError(
173+
f"LocalStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
174+
)
173175
path = self.root / key
174176
await asyncio.to_thread(_put, path, value, start=None, exclusive=exclusive)
175177

src/zarr/storage/_logging.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
import inspect
44
import logging
5+
import sys
56
import time
67
from collections import defaultdict
78
from contextlib import contextmanager
8-
from typing import TYPE_CHECKING, Any
9+
from typing import TYPE_CHECKING, Any, Self, TypeVar
910

1011
from zarr.abc.store import Store
1112
from zarr.storage._wrapper import WrapperStore
@@ -18,8 +19,10 @@
1819

1920
counter: defaultdict[str, int]
2021

22+
T_Store = TypeVar("T_Store", bound=Store)
2123

22-
class LoggingStore(WrapperStore[Store]):
24+
25+
class LoggingStore(WrapperStore[T_Store]):
2326
"""
2427
Store wrapper that logs all calls to the wrapped store.
2528
@@ -42,7 +45,7 @@ class LoggingStore(WrapperStore[Store]):
4245

4346
def __init__(
4447
self,
45-
store: Store,
48+
store: T_Store,
4649
log_level: str = "DEBUG",
4750
log_handler: logging.Handler | None = None,
4851
) -> None:
@@ -67,7 +70,7 @@ def _configure_logger(
6770

6871
def _default_handler(self) -> logging.Handler:
6972
"""Define a default log handler"""
70-
handler = logging.StreamHandler()
73+
handler = logging.StreamHandler(stream=sys.stdout)
7174
handler.setLevel(self.log_level)
7275
handler.setFormatter(
7376
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
@@ -94,6 +97,14 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
9497
end_time = time.time()
9598
self.logger.info("Finished %s [%.2f s]", op, end_time - start_time)
9699

100+
@classmethod
101+
async def open(cls: type[Self], store_cls: type[T_Store], *args: Any, **kwargs: Any) -> Self:
102+
log_level = kwargs.pop("log_level", "DEBUG")
103+
log_handler = kwargs.pop("log_handler", None)
104+
store = store_cls(*args, **kwargs)
105+
await store._open()
106+
return cls(store=store, log_level=log_level, log_handler=log_handler)
107+
97108
@property
98109
def supports_writes(self) -> bool:
99110
with self.log():
@@ -126,8 +137,7 @@ def _is_open(self) -> bool:
126137

127138
@_is_open.setter
128139
def _is_open(self, value: bool) -> None:
129-
with self.log(value):
130-
self._store._is_open = value
140+
raise NotImplementedError("LoggingStore must be opened via the `_open` method")
131141

132142
async def _open(self) -> None:
133143
with self.log():
@@ -151,11 +161,11 @@ def __str__(self) -> str:
151161
return f"logging-{self._store}"
152162

153163
def __repr__(self) -> str:
154-
return f"LoggingStore({repr(self._store)!r})"
164+
return f"LoggingStore({self._store.__class__.__name__}, '{self._store}')"
155165

156166
def __eq__(self, other: object) -> bool:
157167
with self.log(other):
158-
return self._store == other
168+
return type(self) is type(other) and self._store.__eq__(other._store) # type: ignore[attr-defined]
159169

160170
async def get(
161171
self,

src/zarr/storage/_memory.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None
111111
await self._ensure_open()
112112
assert isinstance(key, str)
113113
if not isinstance(value, Buffer):
114-
raise TypeError(f"Expected Buffer. Got {type(value)}.")
114+
raise TypeError(
115+
f"MemoryStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
116+
)
115117

116118
if byte_range is not None:
117119
buf = self._store_dict[key]
@@ -231,8 +233,9 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None
231233
self._check_writable()
232234
assert isinstance(key, str)
233235
if not isinstance(value, Buffer):
234-
raise TypeError(f"Expected Buffer. Got {type(value)}.")
235-
236+
raise TypeError(
237+
f"GpuMemoryStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
238+
)
236239
# Convert to gpu.Buffer
237240
gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value)
238241
await super().set(key, gpu_value, byte_range=byte_range)

src/zarr/storage/_wrapper.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ async def _ensure_open(self) -> None:
5656
async def is_empty(self, prefix: str) -> bool:
5757
return await self._store.is_empty(prefix)
5858

59+
@property
60+
def _is_open(self) -> bool:
61+
return self._store._is_open
62+
63+
@_is_open.setter
64+
def _is_open(self, value: bool) -> None:
65+
raise NotImplementedError("WrapperStore must be opened via the `_open` method")
66+
5967
async def clear(self) -> None:
6068
return await self._store.clear()
6169

@@ -67,7 +75,13 @@ def _check_writable(self) -> None:
6775
return self._store._check_writable()
6876

6977
def __eq__(self, value: object) -> bool:
70-
return type(self) is type(value) and self._store.__eq__(value)
78+
return type(self) is type(value) and self._store.__eq__(value._store) # type: ignore[attr-defined]
79+
80+
def __str__(self) -> str:
81+
return f"wrapping-{self._store}"
82+
83+
def __repr__(self) -> str:
84+
return f"WrapperStore({self._store.__class__.__name__}, '{self._store}')"
7185

7286
async def get(
7387
self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None

src/zarr/storage/_zip.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,14 @@ def _sync_open(self) -> None:
107107
async def _open(self) -> None:
108108
self._sync_open()
109109

110-
def __getstate__(self) -> tuple[Path, ZipStoreAccessModeLiteral, int, bool]:
111-
return self.path, self._zmode, self.compression, self.allowZip64
112-
113-
def __setstate__(self, state: Any) -> None:
114-
self.path, self._zmode, self.compression, self.allowZip64 = state
110+
def __getstate__(self) -> dict[str, Any]:
111+
state = self.__dict__
112+
for attr in ["_zf", "_lock"]:
113+
state.pop(attr, None)
114+
return state
115+
116+
def __setstate__(self, state: dict[str, Any]) -> None:
117+
self.__dict__ = state
115118
self._is_open = False
116119
self._sync_open()
117120

@@ -146,6 +149,8 @@ def _get(
146149
prototype: BufferPrototype,
147150
byte_range: ByteRequest | None = None,
148151
) -> Buffer | None:
152+
if not self._is_open:
153+
self._sync_open()
149154
# docstring inherited
150155
try:
151156
with self._zf.open(key) as f: # will raise KeyError
@@ -190,6 +195,8 @@ async def get_partial_values(
190195
return out
191196

192197
def _set(self, key: str, value: Buffer) -> None:
198+
if not self._is_open:
199+
self._sync_open()
193200
# generally, this should be called inside a lock
194201
keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6])
195202
keyinfo.compress_type = self.compression
@@ -203,9 +210,13 @@ def _set(self, key: str, value: Buffer) -> None:
203210
async def set(self, key: str, value: Buffer) -> None:
204211
# docstring inherited
205212
self._check_writable()
213+
if not self._is_open:
214+
self._sync_open()
206215
assert isinstance(key, str)
207216
if not isinstance(value, Buffer):
208-
raise TypeError("ZipStore.set(): `value` must a Buffer instance")
217+
raise TypeError(
218+
f"ZipStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
219+
)
209220
with self._lock:
210221
self._set(key, value)
211222

0 commit comments

Comments
 (0)