Skip to content

Commit f6b184c

Browse files
authored
Strip slash before Windows drive letter in path (#10116)
Functions like urllib.parse.urlsplit() parses a file:// URL created from a non-UNC Windows absolute path with a leading slash in the path component: >>> from pathlib import Path >>> from urllib.parse import urlsplit >>> path = Path("C:/Users/VssAdministrator") >>> parsed = urlsplit(path.as_posix()) >>> parsed.path '/C:/Users/VssAdministrator' This value unfortunately does not play well with path functions like open(), so we performs some additional cleanup to strip that leading slash. This commit also contains some minor cleanup to unify how Windows is detected, and how a file:// URL is fetched.
1 parent 76cd70a commit f6b184c

File tree

4 files changed

+42
-11
lines changed

4 files changed

+42
-11
lines changed

news/10115.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Strip leading slash from a ``file://`` URL built from an path with the Windows
2+
drive notation. This fixes bugs where the ``file://`` URL cannot be correctly
3+
used as requirement, constraint, or index URLs on Windows.

src/pip/_internal/req/req_file.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pip._internal.network.session import PipSession
1717
from pip._internal.network.utils import raise_for_status
1818
from pip._internal.utils.encoding import auto_decode
19-
from pip._internal.utils.urls import get_url_scheme, url_to_path
19+
from pip._internal.utils.urls import get_url_scheme
2020

2121
if TYPE_CHECKING:
2222
# NoReturn introduced in 3.6.2; imported only for type checking to maintain
@@ -532,20 +532,16 @@ def get_file_content(url, session):
532532
"""
533533
scheme = get_url_scheme(url)
534534

535-
if scheme in ['http', 'https']:
536-
# FIXME: catch some errors
535+
# Pip has special support for file:// URLs (LocalFSAdapter).
536+
if scheme in ['http', 'https', 'file']:
537537
resp = session.get(url)
538538
raise_for_status(resp)
539539
return resp.url, resp.text
540540

541-
elif scheme == 'file':
542-
url = url_to_path(url)
543-
541+
# Assume this is a bare path.
544542
try:
545543
with open(url, 'rb') as f:
546544
content = auto_decode(f.read())
547545
except OSError as exc:
548-
raise InstallationError(
549-
f'Could not open requirements file: {exc}'
550-
)
546+
raise InstallationError(f'Could not open requirements file: {exc}')
551547
return url, content

src/pip/_internal/utils/urls.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import os
2-
import sys
2+
import string
33
import urllib.parse
44
import urllib.request
55
from typing import Optional
66

7+
from .compat import WINDOWS
8+
79

810
def get_url_scheme(url):
911
# type: (str) -> Optional[str]
@@ -37,7 +39,7 @@ def url_to_path(url):
3739
if not netloc or netloc == "localhost":
3840
# According to RFC 8089, same as empty authority.
3941
netloc = ""
40-
elif sys.platform == "win32":
42+
elif WINDOWS:
4143
# If we have a UNC path, prepend UNC share notation.
4244
netloc = "\\\\" + netloc
4345
else:
@@ -46,4 +48,18 @@ def url_to_path(url):
4648
)
4749

4850
path = urllib.request.url2pathname(netloc + path)
51+
52+
# On Windows, urlsplit parses the path as something like "/C:/Users/foo".
53+
# This creates issues for path-related functions like io.open(), so we try
54+
# to detect and strip the leading slash.
55+
if (
56+
WINDOWS
57+
and not netloc # Not UNC.
58+
and len(path) >= 3
59+
and path[0] == "/" # Leading slash to strip.
60+
and path[1] in string.ascii_letters # Drive letter.
61+
and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
62+
):
63+
path = path[1:]
64+
4965
return path

tests/unit/test_req_file.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,22 @@ def parse_reqfile(
6767
)
6868

6969

70+
def test_read_file_url(tmp_path):
71+
reqs = tmp_path.joinpath("requirements.txt")
72+
reqs.write_text("foo")
73+
result = list(parse_requirements(reqs.as_posix(), session))
74+
75+
assert len(result) == 1, result
76+
assert result[0].requirement == "foo"
77+
78+
# The comes_from value has three parts: -r or -c flag, path, and line.
79+
# The path value in the middle needs some special logic due to our path
80+
# normalization logic.
81+
assert result[0].comes_from[:3] == "-r "
82+
assert result[0].comes_from[-9:] == " (line 1)"
83+
assert os.path.samefile(result[0].comes_from[3:-9], str(reqs))
84+
85+
7086
class TestPreprocess:
7187
"""tests for `preprocess`"""
7288

0 commit comments

Comments
 (0)