Skip to content

Commit 3277806

Browse files
joouhanormanrz
andauthored
Implement UPath.resolve (#86)
* Stop parsing URI paths as posix paths * Add URI path normalization in `UPath.resolve` * Follow redirects in `HTTPPath.resolve` * Update test_http.py * linting * trigger ci --------- Co-authored-by: Norman Rzepka <[email protected]>
1 parent 739fa30 commit 3277806

File tree

4 files changed

+169
-3
lines changed

4 files changed

+169
-3
lines changed

upath/core.py

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import re
55
import sys
66
from os import PathLike
7+
from pathlib import _PosixFlavour # type: ignore
78
from typing import Sequence
89
from typing import TypeVar
910
from typing import TYPE_CHECKING
@@ -89,17 +90,41 @@ def touch(self, path, **kwargs):
8990
return self._fs.touch(self._format_path(path), **kwargs)
9091

9192

93+
class _UriFlavour(_PosixFlavour):
94+
def parse_parts(self, parts):
95+
parsed = []
96+
sep = self.sep
97+
drv = root = ""
98+
it = reversed(parts)
99+
for part in it:
100+
if part:
101+
drv, root, rel = self.splitroot(part)
102+
if not root or root and rel:
103+
for x in reversed(rel.split(sep)):
104+
parsed.append(sys.intern(x))
105+
106+
if drv or root:
107+
parsed.append(drv + root)
108+
parsed.reverse()
109+
return drv, root, parsed
110+
111+
def splitroot(self, part, sep="/"):
112+
# Treat the first slash in the path as the root if it exists
113+
if part and part[0] == sep:
114+
return "", sep, part[1:]
115+
return "", "", part
116+
117+
92118
PT = TypeVar("PT", bound="UPath")
93119

94120

95121
class UPath(pathlib.Path):
96-
97122
__slots__ = (
98123
"_url",
99124
"_kwargs",
100125
"_accessor", # overwritten because of default in Python 3.10
101126
)
102-
_flavour = pathlib._posix_flavour # type: ignore
127+
_flavour = _UriFlavour()
103128
_default_accessor = _FSSpecAccessor
104129

105130
# typing
@@ -311,7 +336,40 @@ def absolute(self: PT) -> PT:
311336
return self
312337

313338
def resolve(self: PT, strict: bool = False) -> PT:
314-
raise NotImplementedError
339+
"""Return a new path with '.' and '..' parts normalized."""
340+
_parts = self._parts
341+
342+
# Do not attempt to normalize path if no parts are dots
343+
if ".." not in _parts and "." not in _parts:
344+
return self
345+
346+
sep = self._flavour.sep
347+
348+
resolved: list[str] = []
349+
resolvable_parts = _parts[1:]
350+
idx_max = len(resolvable_parts) - 1
351+
for i, part in enumerate(resolvable_parts):
352+
if part == "..":
353+
if resolved:
354+
resolved.pop()
355+
elif part != ".":
356+
if i < idx_max:
357+
part += sep
358+
resolved.append(part)
359+
360+
path = "".join(resolved)
361+
url = self._url
362+
if url is not None:
363+
url = url._replace(path=path)
364+
parts = _parts[:1] + path.split(sep)
365+
366+
return self._from_parsed_parts(
367+
self._drv,
368+
self._root,
369+
parts,
370+
url=url,
371+
**self._kwargs,
372+
)
315373

316374
def exists(self) -> bool:
317375
"""

upath/implementations/http.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from fsspec.asyn import sync
2+
13
import upath.core
24

35

@@ -47,3 +49,35 @@ def _sub_path(self, name):
4749
name = name.strip("/")
4850

4951
return name
52+
53+
def resolve(
54+
self: "HTTPPath", strict: bool = False, follow_redirects: bool = True
55+
) -> "HTTPPath":
56+
"""Normalize the path and resolve redirects."""
57+
# Normalise the path
58+
resolved_path = super().resolve(strict=strict)
59+
60+
if follow_redirects:
61+
# Ensure we have a url
62+
parsed_url = resolved_path._url
63+
if parsed_url is None:
64+
return resolved_path
65+
else:
66+
url = parsed_url.geturl()
67+
# Get the fsspec fs
68+
fs = resolved_path._accessor._fs
69+
# Ensure we have a session
70+
session = sync(fs.loop, fs.set_session)
71+
# Use HEAD requests if the server allows it, falling back to GETs
72+
for method in (session.head, session.get):
73+
r = sync(fs.loop, method, url, allow_redirects=True)
74+
try:
75+
r.raise_for_status()
76+
except Exception as exc:
77+
if method == session.get:
78+
raise FileNotFoundError(self) from exc
79+
else:
80+
resolved_path = HTTPPath(str(r.url))
81+
break
82+
83+
return resolved_path

upath/tests/implementations/test_http.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,9 @@ def test_write_text(self, pathlib_base):
7676

7777
def test_fsspec_compat(self):
7878
pass
79+
80+
def test_resolve(self):
81+
# Also tests following redirects, because the test server issues a
82+
# 301 redirect for `http://127.0.0.1:8080/folder` to
83+
# `http://127.0.0.1:8080/folder/`
84+
assert str(self.path.resolve()).endswith("/")

upath/tests/test_core.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,71 @@ def test_relative_to():
251251
UPath("s3://test_bucket/file.txt", anon=True).relative_to(
252252
UPath("s3://test_bucket", anon=False)
253253
)
254+
255+
256+
def test_uri_parsing():
257+
assert (
258+
str(UPath("http://www.example.com//a//b/"))
259+
== "http://www.example.com//a//b/"
260+
)
261+
262+
263+
NORMALIZATIONS = (
264+
("unnormalized", "normalized"),
265+
(
266+
# Expected normalization results according to curl
267+
("http://example.com", "http://example.com/"),
268+
("http://example.com/", "http://example.com/"),
269+
("http://example.com/a", "http://example.com/a"),
270+
("http://example.com//a", "http://example.com//a"),
271+
("http://example.com///a", "http://example.com///a"),
272+
("http://example.com////a", "http://example.com////a"),
273+
("http://example.com/a/.", "http://example.com/a/"),
274+
("http://example.com/a/./", "http://example.com/a/"),
275+
("http://example.com/a/./b", "http://example.com/a/b"),
276+
("http://example.com/a/.//", "http://example.com/a//"),
277+
("http://example.com/a/.//b", "http://example.com/a//b"),
278+
("http://example.com/a//.", "http://example.com/a//"),
279+
("http://example.com/a//./", "http://example.com/a//"),
280+
("http://example.com/a//./b", "http://example.com/a//b"),
281+
("http://example.com/a//.//", "http://example.com/a///"),
282+
("http://example.com/a//.//b", "http://example.com/a///b"),
283+
("http://example.com/a/..", "http://example.com/"),
284+
("http://example.com/a/../", "http://example.com/"),
285+
("http://example.com/a/../.", "http://example.com/"),
286+
("http://example.com/a/../..", "http://example.com/"),
287+
("http://example.com/a/../../", "http://example.com/"),
288+
("http://example.com/a/../..//", "http://example.com//"),
289+
("http://example.com/a/..//", "http://example.com//"),
290+
("http://example.com/a/..//.", "http://example.com//"),
291+
("http://example.com/a/..//..", "http://example.com/"),
292+
("http://example.com/a/../b", "http://example.com/b"),
293+
("http://example.com/a/..//b", "http://example.com//b"),
294+
("http://example.com/a//..", "http://example.com/a/"),
295+
("http://example.com/a//../", "http://example.com/a/"),
296+
("http://example.com/a//../.", "http://example.com/a/"),
297+
("http://example.com/a//../..", "http://example.com/"),
298+
("http://example.com/a//../../", "http://example.com/"),
299+
("http://example.com/a//../..//", "http://example.com//"),
300+
("http://example.com/a//..//..", "http://example.com/a/"),
301+
("http://example.com/a//../b", "http://example.com/a/b"),
302+
("http://example.com/a//..//", "http://example.com/a//"),
303+
("http://example.com/a//..//.", "http://example.com/a//"),
304+
("http://example.com/a//..//b", "http://example.com/a//b"),
305+
# Normalization with and without an authority component
306+
("memory:/a/b/..", "memory:/a/"),
307+
("memory:/a/b/../..", "memory:/"),
308+
("memory:/a/b/../../..", "memory:/"),
309+
("memory://a/b/..", "memory://a/"),
310+
("memory://a/b/../..", "memory://a/"),
311+
("memory://a/b/../../..", "memory://a/"),
312+
),
313+
)
314+
315+
316+
@pytest.mark.parametrize(*NORMALIZATIONS)
317+
def test_normalize(unnormalized, normalized):
318+
expected = str(UPath(normalized))
319+
# Normalise only, do not attempt to follow redirects for http:// paths here
320+
result = str(UPath.resolve(UPath(unnormalized)))
321+
assert expected == result

0 commit comments

Comments
 (0)