Skip to content

Commit 24e15d3

Browse files
authored
Add storage options and protocol (#135)
* tests: add storage-options and protocol tests * upath: implement storage_options and protocol * upath.implementations.webdav: fix protocol and storage_options * upath: allow overriding .path in subclasses * tests: fix azure protocol test * upath.implementations.http: fix .path * upath: update readme * typing: fix errors
1 parent bbc3c3d commit 24e15d3

File tree

8 files changed

+135
-10
lines changed

8 files changed

+135
-10
lines changed

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,29 @@ If a local path is provided, `UPath` will return a `PosixUPath` or `WindowsUPath
125125
These two subclasses are 100% compatible with the `PosixPath` and `WindowsPath` classes of their
126126
specific Python version, and are tested against all relevant tests of the CPython pathlib test-suite.
127127

128+
### UPath public class API
129+
130+
`UPath`'s public class interface is identical to `pathlib.Path` with the addition of the following attributes:
131+
132+
- `UPath(...).protocol: str` the filesystem_spec protocol _(note: for `PosixUPath` and `WindowsUPath` it's an empty string)_
133+
- `UPath(...).storage_options: dict[str, Any]` the storage options for instantiating the filesystem_spec class
134+
- `UPath(...).path: str` the filesystem_spec compatible path for use with filesystem instances
135+
- `UPath(...).fs: AbstractFileSystem` convenience attribute to access an instantiated filesystem
136+
137+
the first three provide a public interface to access a file via fsspec as follows:
138+
139+
```python
140+
from upath import UPath
141+
from fsspec import filesystem
142+
143+
p = UPath("s3://bucket/file.txt", anon=True)
144+
145+
fs = filesystem(p.protocol, **p.storage_options) # equivalent to p.fs
146+
with fs.open(p.path) as f:
147+
data = f.read()
148+
```
149+
150+
128151
## Contributing
129152

130153
Contributions are very welcome.

upath/core.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(self, parsed_url: SplitResult | None, **kwargs: Any) -> None:
4444
self._fs = cls(**url_kwargs)
4545

4646
def _format_path(self, path: UPath) -> str:
47-
return path.path
47+
return path._path
4848

4949
def open(self, path, mode="r", *args, **kwargs):
5050
return self._fs.open(self._format_path(path), mode, *args, **kwargs)
@@ -206,6 +206,44 @@ def __new__(cls: type[PT], *args: str | PathLike, **kwargs: Any) -> PT:
206206
args_list, url=parsed_url, **kwargs
207207
)
208208

209+
@property
210+
def protocol(self) -> str:
211+
"""The filesystem_spec protocol
212+
213+
For local paths protocol is either 'file' if the UPath instance
214+
is backed by fsspec or '' if it's backed by stdlib pathlib. For
215+
both `fsspec.get_filesystem_class` returns `LocalFileSystem`.
216+
"""
217+
if self._url is None:
218+
return ""
219+
return self._url.scheme
220+
221+
@property
222+
def storage_options(self) -> dict[str, Any]:
223+
"""The filesystem_spec storage options dictionary
224+
225+
Accessing `.storage_options` does not instantiate the
226+
corresponding fsspec filesystem class.
227+
"""
228+
return {
229+
key: value
230+
for key, value in self._kwargs.items()
231+
if key not in {"scheme", "netloc", "url"}
232+
}
233+
234+
@property
235+
def fs(self) -> AbstractFileSystem:
236+
"""The filesystem_spec filesystem instance"""
237+
return self._accessor._fs
238+
239+
@property
240+
def path(self) -> str:
241+
"""The filesystem_spec path for use with a filesystem instance
242+
243+
Note: for some file systems this can be prefixed by the protocol.
244+
"""
245+
return self._path
246+
209247
def __getattr__(self, item: str) -> Any:
210248
if item == "_accessor":
211249
# cache the _accessor attribute on first access
@@ -258,7 +296,7 @@ def _format_parsed_parts(
258296
return formatted
259297

260298
@property
261-
def path(self) -> str:
299+
def _path(self) -> str:
262300
if self._parts:
263301
join_parts = self._parts[1:] if self._parts[0] == "/" else self._parts
264302
path: str = self._flavour.join(join_parts)
@@ -349,7 +387,7 @@ def rglob(self: PT, pattern: str) -> Generator[PT, None, None]:
349387

350388
def _sub_path(self, name):
351389
# only want the path name with iterdir
352-
sp = self.path
390+
sp = self._path
353391
return re.sub(f"^({sp}|{sp[1:]})/", "", name)
354392

355393
def absolute(self: PT) -> PT:
@@ -631,10 +669,6 @@ def __str__(self) -> str:
631669
)
632670
return self._str
633671

634-
@property
635-
def fs(self) -> AbstractFileSystem:
636-
return self._accessor._fs
637-
638672
def __truediv__(self: PT, key: str | PathLike) -> PT:
639673
# Add `/` root if not present
640674
if len(self._parts) == 0:

upath/implementations/cloud.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def _format_path(self, path):
1010
"""
1111
netloc has already been set to project via `CloudPath._from_parts`
1212
"""
13-
return f"{path._url.netloc}/{path.path.lstrip('/')}"
13+
return f"{path._url.netloc}/{path._path.lstrip('/')}"
1414

1515
def mkdir(self, path, create_parents=True, **kwargs):
1616
_path = self._format_path(path)
@@ -49,7 +49,7 @@ def _sub_path(self, name):
4949
`listdir` and `glob`. However, in `iterdir` and `glob` we only want the
5050
relative path to `self`.
5151
"""
52-
sp = re.escape(self.path)
52+
sp = re.escape(self._path)
5353
netloc = self._url.netloc
5454
return re.sub(
5555
f"^({netloc})?/?({sp}|{sp[1:]})/?",
@@ -71,6 +71,12 @@ def joinpath(self, *args):
7171
self._kwargs["bucket"] = bucket
7272
return super().joinpath(*tuple(args_list))
7373

74+
@property
75+
def path(self) -> str:
76+
if self._url is None:
77+
raise RuntimeError(str(self))
78+
return f"{self._url.netloc}{super()._path}"
79+
7480

7581
class GCSPath(CloudPath):
7682
pass

upath/implementations/http.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
from urllib.parse import urlunsplit
4+
35
from fsspec.asyn import sync
46

57
import upath.core
@@ -43,7 +45,7 @@ def _sub_path(self, name):
4345
relative path to `self`.
4446
"""
4547
complete_address = self._format_parsed_parts(
46-
None, None, [self.path], url=self._url, **self._kwargs
48+
None, None, [self._path], url=self._url, **self._kwargs
4749
)
4850

4951
if name.startswith(complete_address):
@@ -83,3 +85,10 @@ def resolve(
8385
break
8486

8587
return resolved_path
88+
89+
@property
90+
def path(self) -> str:
91+
# http filesystems use the full url as path
92+
if self._url is None:
93+
raise RuntimeError(str(self))
94+
return urlunsplit(self._url)

upath/implementations/webdav.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from typing import Any
34
from urllib.parse import ParseResult
45
from urllib.parse import urlunsplit
56

@@ -49,3 +50,20 @@ def _sub_path(self, name):
4950
name = name.strip("/")
5051

5152
return name
53+
54+
@property
55+
def protocol(self) -> str:
56+
if self._url is None:
57+
raise RuntimeError(str(self))
58+
return self._url.scheme.split("+")[0]
59+
60+
@property
61+
def storage_options(self) -> dict[str, Any]:
62+
if self._url is None:
63+
raise RuntimeError(str(self))
64+
sopts = super().storage_options
65+
http_protocol = self._url.scheme.split("+")[1]
66+
assert http_protocol in {"http", "https"}
67+
base_url = urlunsplit(self._url._replace(scheme=http_protocol, path=""))
68+
sopts["base_url"] = base_url
69+
return sopts

upath/tests/cases.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55

66
import pytest
7+
from fsspec import filesystem
78

89
from upath import UPath
910

@@ -416,3 +417,24 @@ def test_as_uri(self):
416417
uri = p0.as_uri()
417418
p1 = UPath(uri, **p0.fs.storage_options)
418419
assert p0 == p1
420+
421+
def test_protocol(self):
422+
protocol = self.path.protocol
423+
protocols = [p] if isinstance((p := type(self.path.fs).protocol), str) else p
424+
print(protocol, protocols)
425+
assert protocol in protocols
426+
427+
def test_storage_options(self):
428+
storage_options = self.path.storage_options
429+
assert storage_options == self.path.fs.storage_options
430+
431+
def test_read_with_fsspec(self):
432+
p = self.path.joinpath("file2.txt")
433+
434+
protocol = p.protocol
435+
storage_options = p.storage_options
436+
path = p.path
437+
438+
fs = filesystem(protocol, **storage_options)
439+
with fs.open(path) as f:
440+
assert f.read() == b"hello world"

upath/tests/implementations/test_azure.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,8 @@ def test_makedirs_exist_ok_false(self):
4444

4545
def test_rglob(self, pathlib_base):
4646
return super().test_rglob(pathlib_base)
47+
48+
def test_protocol(self):
49+
# test all valid protocols for azure...
50+
protocol = self.path.protocol
51+
assert protocol in ["abfs", "abfss", "adl", "az"]

upath/tests/implementations/test_webdav.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,11 @@ def path(self, webdav_fixture):
1212

1313
def test_fsspec_compat(self):
1414
pass
15+
16+
def test_storage_options(self):
17+
# we need to add base_url to storage options for webdav filesystems,
18+
# to be able to serialize the http protocol to string...
19+
storage_options = self.path.storage_options
20+
base_url = storage_options.pop("base_url")
21+
assert storage_options == self.path.fs.storage_options
22+
assert base_url == self.path.fs.client.base_url

0 commit comments

Comments
 (0)