Skip to content

Commit 674f1db

Browse files
authored
Implement file scheme (#404)
* Implement file scheme * changelog * living in the past * add absolute * windows support and tests
1 parent ddc3b94 commit 674f1db

File tree

5 files changed

+88
-0
lines changed

5 files changed

+88
-0
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
- Implement sliced downloads in GSClient. (Issue [#387](https://github.com/drivendataorg/cloudpathlib/issues/387), PR [#389](https://github.com/drivendataorg/cloudpathlib/pull/389))
55
- Implement `as_url` with presigned parameter for all backends. (Issue [#235](https://github.com/drivendataorg/cloudpathlib/issues/235), PR [#236](https://github.com/drivendataorg/cloudpathlib/pull/236))
66
- Stream to and from Azure Blob Storage. (PR [#403](https://github.com/drivendataorg/cloudpathlib/pull/403))
7+
- Implement `file:` URI scheme support for `AnyPath`. (Issue [#401](https://github.com/drivendataorg/cloudpathlib/issues/401), PR [#404](https://github.com/drivendataorg/cloudpathlib/pull/404))
78

89
## 0.17.0 (2023-12-21)
910

cloudpathlib/anypath.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from .cloudpath import InvalidPrefixError, CloudPath
77
from .exceptions import AnyPathTypeError
8+
from .url_utils import path_from_fileurl
89

910

1011
class AnyPath(ABC):
@@ -22,6 +23,12 @@ def __new__(cls, *args, **kwargs) -> Union[CloudPath, Path]: # type: ignore
2223
return CloudPath(*args, **kwargs) # type: ignore
2324
except InvalidPrefixError as cloudpath_exception:
2425
try:
26+
if isinstance(args[0], str) and args[0].lower().startswith("file:"):
27+
path = path_from_fileurl(args[0], **kwargs)
28+
for part in args[1:]:
29+
path /= part
30+
return path
31+
2532
return Path(*args, **kwargs)
2633
except TypeError as path_exception:
2734
raise AnyPathTypeError(

cloudpathlib/url_utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from pathlib import PureWindowsPath, Path
2+
from urllib.request import url2pathname
3+
from urllib.parse import urlparse, unquote
4+
5+
6+
def path_from_fileurl(urlstr, **kwargs):
7+
"""
8+
Take a file:// url and return a Path.
9+
10+
Adapted from:
11+
https://github.com/AcademySoftwareFoundation/OpenTimelineIO/blob/4c17494dee2e515aedc8623741556fae3e4afe72/src/py-opentimelineio/opentimelineio/url_utils.py#L43-L72
12+
"""
13+
# explicitly unquote first in case drive colon is url encoded
14+
unquoted = unquote(urlstr)
15+
16+
# Parse provided URL
17+
parsed_result = urlparse(unquoted)
18+
19+
# Convert the parsed URL to a path
20+
filepath = Path(url2pathname(parsed_result.path), **kwargs)
21+
22+
# If the network location is a window drive, reassemble the path
23+
if PureWindowsPath(parsed_result.netloc).drive:
24+
filepath = Path(parsed_result.netloc + parsed_result.path, **kwargs)
25+
26+
# Otherwise check if the specified index is a windows drive, then offset the path
27+
elif len(filepath.parts) > 1 and PureWindowsPath(filepath.parts[1]).drive:
28+
# Remove leading "/" if/when `request.url2pathname` yields "/S:/path/file.ext"
29+
filepath = Path(*filepath.parts[1:], **kwargs)
30+
31+
return filepath

docs/docs/anypath-polymorphism.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,31 @@ isinstance(cloud_path, AnyPath)
2323
#> True
2424
```
2525

26+
## `file:` URI Scheme
27+
28+
`AnyPath` also supports the [`file:` URI scheme](https://en.wikipedia.org/wiki/File_URI_scheme) _for paths that can be referenced with pathlib_ and returns a `Path` instance for those paths. If you need to roundtrip back to a `file:` URI, you can use the `Path.as_uri` method after any path manipulations that you do.
29+
30+
For example:
31+
32+
```python
33+
from cloudpathlib import AnyPath
34+
35+
# hostname omitted variant
36+
path = AnyPath("file:/root/mydir/myfile.txt")
37+
path
38+
#> PosixPath('/root/mydir/myfile.txt')
39+
40+
# explicit local path variant
41+
path = AnyPath("file:///root/mydir/myfile.txt")
42+
path
43+
#> PosixPath('/root/mydir/myfile.txt')
44+
45+
# manipulate the path and return the file:// URI
46+
parent_uri = path.parent.as_uri()
47+
parent_uri
48+
#> 'file:///root/mydir'
49+
```
50+
2651
## How It Works
2752

2853
The constructor for `AnyPath` will first attempt to run the input through the `CloudPath` base class' constructor, which will validate the input against registered concrete `CloudPath` implementations. This will accept inputs that are already a cloud path class or a string with the appropriate URI scheme prefix (e.g., `s3://`). If no implementation validates successfully, it will then try to run the input through the `Path` constructor. If the `Path` constructor fails and raises a `TypeError`, then the `AnyPath` constructor will raise an `AnyPathTypeError` exception.

tests/test_anypath.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from pathlib import Path, PosixPath, WindowsPath
23

34
import pytest
@@ -20,6 +21,29 @@ def test_anypath_path():
2021
assert issubclass(WindowsPath, AnyPath)
2122
assert not issubclass(str, AnyPath)
2223

24+
# test `file:` scheme (only works with absolute paths; needs .absolute() on Windows)
25+
assert AnyPath(path.absolute().resolve().as_uri()) == path.absolute().resolve()
26+
27+
# test file:// + multi arg
28+
assert AnyPath(*path.absolute().resolve().as_uri().rsplit("/", 2)) == path.absolute().resolve()
29+
30+
# test no hostname
31+
assert Path("/foo/bar") == AnyPath("file:/foo/bar")
32+
assert Path("/foo/bar") == AnyPath("file:///foo/bar")
33+
34+
# windows tests
35+
if os.name == "nt":
36+
assert Path("c:\\hello\\test.txt") == AnyPath("file:/c:/hello/test.txt")
37+
assert Path("c:\\hello\\test.txt") == AnyPath("file://c:/hello/test.txt")
38+
assert Path("c:\\hello\\test.txt") == AnyPath("file:///c:/hello/test.txt")
39+
assert Path("c:\\hello\\test.txt") == AnyPath("file://c%3A//hello/test.txt")
40+
assert Path("c:\\hello\\test.txt") == AnyPath("file://localhost/c%3a/hello/test.txt")
41+
assert Path("c:\\WINDOWS\\clock.avi") == AnyPath("file://localhost/c|/WINDOWS/clock.avi")
42+
assert Path("c:\\WINDOWS\\clock.avi") == AnyPath("file:///c|/WINDOWS/clock.avi")
43+
assert Path("c:\\hello\\test space.txt") == AnyPath(
44+
"file://localhost/c%3a/hello/test%20space.txt"
45+
)
46+
2347

2448
def test_anypath_cloudpath(rig):
2549
cloudpath = rig.create_cloud_path("a/b/c")

0 commit comments

Comments
 (0)