Skip to content

Commit e06e7f1

Browse files
committed
fix(remotestore): raise error if path includes scheme
1 parent 3b787a4 commit e06e7f1

File tree

3 files changed

+80
-13
lines changed

3 files changed

+80
-13
lines changed

src/zarr/abc/store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def with_mode(self, mode: AccessModeLiteral) -> Self:
9595
9696
Parameters
9797
----------
98-
mode: AccessModeLiteral
98+
mode : AccessModeLiteral
9999
The new mode to use.
100100
101101
Returns
102102
-------
103-
store:
103+
store
104104
A new store of the same type with the new mode.
105105
106106
Examples

src/zarr/storage/remote.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from typing import TYPE_CHECKING, Any, Self
45

56
import fsspec
@@ -42,15 +43,45 @@ def __init__(
4243
allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS,
4344
) -> None:
4445
"""
46+
A remote Store based on FSSpec
47+
4548
Parameters
4649
----------
47-
url: root of the datastore. In fsspec notation, this is usually like "protocol://path/to".
48-
Can also be a upath.UPath instance/
49-
allowed_exceptions: when fetching data, these cases will be deemed to correspond to missing
50-
keys, rather than some other IO failure
51-
storage_options: passed on to fsspec to make the filesystem instance. If url is a UPath,
52-
this must not be used.
53-
50+
fs : AsyncFileSystem
51+
The Async FSSpec filesystem to use with this store.
52+
mode : AccessModeLiteral
53+
The access mode to use.
54+
path : str
55+
The root path of the store. This should be a relative path and must not include the
56+
filesystem scheme.
57+
allowed_exceptions : tuple[type[Exception], ...]
58+
When fetching data, these cases will be deemed to correspond to missing keys.
59+
60+
Attributes
61+
----------
62+
fs
63+
allowed_exceptions
64+
supports_writes
65+
supports_deletes
66+
supports_partial_writes
67+
supports_listing
68+
69+
Raises
70+
------
71+
TypeError
72+
If the Filesystem does not support async operations.
73+
ValueError
74+
If the path argument includes a scheme.
75+
76+
Warns
77+
-----
78+
UserWarning
79+
If the file system (fs) was not created with `asynchronous=True`.
80+
81+
See Also
82+
--------
83+
RemoteStore.from_upath
84+
RemoteStore.from_url
5485
"""
5586
super().__init__(mode=mode)
5687
self.fs = fs
@@ -59,6 +90,14 @@ def __init__(
5990

6091
if not self.fs.async_impl:
6192
raise TypeError("Filesystem needs to support async operations.")
93+
if not self.fs.asynchronous:
94+
warnings.warn(
95+
f"fs ({fs}) was not created with `asynchronous=True`, this may lead to surprising behavior",
96+
stacklevel=2,
97+
)
98+
if "://" in path:
99+
scheme, _ = path.split("://", maxsplit=1)
100+
raise ValueError(f"path argument to RemoteStore must not include scheme ({scheme}://)")
62101

63102
@classmethod
64103
def from_upath(
@@ -82,7 +121,11 @@ def from_url(
82121
mode: AccessModeLiteral = "r",
83122
allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS,
84123
) -> RemoteStore:
85-
fs, path = fsspec.url_to_fs(url, **storage_options)
124+
opts = storage_options or {}
125+
opts = {"asynchronous": True, **opts}
126+
print("opts")
127+
128+
fs, path = fsspec.url_to_fs(url, **opts)
86129
return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions)
87130

88131
async def clear(self) -> None:

tests/v3/test_store/test_remote.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,12 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]:
8686

8787
async def test_basic() -> None:
8888
store = RemoteStore.from_url(
89-
f"s3://{test_bucket_name}",
89+
f"s3://{test_bucket_name}/foo/spam/",
9090
mode="w",
9191
storage_options={"endpoint_url": endpoint_url, "anon": False},
9292
)
93+
assert store.fs.asynchronous
94+
assert store.path == f"{test_bucket_name}/foo/spam"
9395
assert await _collect_aiterator(store.list()) == ()
9496
assert not await store.exists("foo")
9597
data = b"hello"
@@ -109,7 +111,7 @@ class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]):
109111
@pytest.fixture
110112
def store_kwargs(self, request) -> dict[str, str | bool]:
111113
fs, path = fsspec.url_to_fs(
112-
f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False
114+
f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True
113115
)
114116
return {"fs": fs, "path": path, "mode": "r+"}
115117

@@ -183,6 +185,28 @@ async def test_remote_store_from_uri(
183185
assert dict(group.attrs) == {"key": "value-3"}
184186

185187
def test_from_upath(self) -> None:
186-
path = UPath(f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False)
188+
path = UPath(
189+
f"s3://{test_bucket_name}/foo/bar/",
190+
endpoint_url=endpoint_url,
191+
anon=False,
192+
asynchronous=True,
193+
)
187194
result = RemoteStore.from_upath(path)
188195
assert result.fs.endpoint_url == endpoint_url
196+
assert result.fs.asynchronous
197+
assert result.path == f"{test_bucket_name}/foo/bar"
198+
199+
def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None:
200+
store_kwargs["path"] = "s3://" + store_kwargs["path"]
201+
with pytest.raises(
202+
ValueError, match="path argument to RemoteStore must not include scheme .*"
203+
):
204+
self.store_cls(**store_kwargs)
205+
206+
def test_init_warns_if_fs_asynchronous_is_false(self) -> None:
207+
fs, path = fsspec.url_to_fs(
208+
f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False
209+
)
210+
store_kwargs = {"fs": fs, "path": path, "mode": "r+"}
211+
with pytest.warns(UserWarning, match=r".* was not created with `asynchronous=True`.*"):
212+
self.store_cls(**store_kwargs)

0 commit comments

Comments
 (0)