Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ jobs:
- "3.11"
- "3.12"
- "3.13"
- "3.14"

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -181,6 +182,7 @@ jobs:
# - "3.11"
# - "3.12"
- "3.13"
- "3.14"
group:
- { number: 1, pytest-filter: "not test_install" }
- { number: 2, pytest-filter: "test_install" }
Expand Down
1 change: 1 addition & 0 deletions news/13501.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make conversion of file URLs more consistent across Python versions.
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def should_update_common_wheels() -> bool:
# -----------------------------------------------------------------------------
# Development Commands
# -----------------------------------------------------------------------------
@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3"])
@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "pypy3"])
def test(session: nox.Session) -> None:
# Get the common wheels.
if should_update_common_wheels():
Expand Down
37 changes: 23 additions & 14 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,39 +121,47 @@ def _clean_url_path_part(part: str) -> str:
return urllib.parse.quote(urllib.parse.unquote(part))


def _clean_file_url_path(part: str) -> str:
def _clean_file_url(url: str) -> str:
"""
Clean the first part of a URL path that corresponds to a local
Clean a URL that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# Replace "@" characters to protect them from percent-encoding.
at_symbol_token = "---PIP_AT_SYMBOL---"
assert at_symbol_token not in url
url = url.replace("@", at_symbol_token)
parts = urllib.parse.urlsplit(url)

# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib.request.pathname2url(urllib.request.url2pathname(part))
# exist, the colon should be quoted.
tidy_url = path_to_url(url_to_path(url), normalize_path=False)
tidy_parts = urllib.parse.urlsplit(tidy_url)

# Restore the original scheme, query and fragment components.
url = urllib.parse.urlunsplit(tidy_parts[:3] + parts[3:])
url = url.replace(tidy_parts.scheme, parts.scheme, 1)

# Restore "@" characters that were replaced earlier.
return url.replace(at_symbol_token, "@")


# percent-encoded: /
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)


def _clean_url_path(path: str, is_local_path: bool) -> str:
def _clean_url_path(path: str) -> str:
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_func = _clean_file_url_path
else:
clean_func = _clean_url_path_part

# Split on the reserved characters prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = _reserved_chars_re.split(path)

cleaned_parts = []
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
cleaned_parts.append(clean_func(to_clean))
cleaned_parts.append(_clean_url_path_part(to_clean))
# Normalize %xx escapes (e.g. %2f -> %2F)
cleaned_parts.append(reserved.upper())

Expand All @@ -170,8 +178,9 @@ def _ensure_quoted_url(url: str) -> str:
# `scheme://netloc/path?query#fragment`.
result = urllib.parse.urlsplit(url)
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
if not result.netloc:
return _clean_file_url(url)
path = _clean_url_path(result.path)
return urllib.parse.urlunsplit(result._replace(path=path))


Expand Down
83 changes: 49 additions & 34 deletions src/pip/_internal/utils/urls.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,70 @@
import os
import string
import sys
import urllib.parse
import urllib.request

from .compat import WINDOWS


def path_to_url(path: str) -> str:
def path_to_url(path: str, normalize_path: bool = True) -> str:
"""
Convert a path to a file: URL. The path will be made absolute and have
quoted path parts.
Convert a path to a file: URL with quoted path parts. The path will be
normalized and made absolute if *normalize_path* is true (the default.)
"""
path = os.path.normpath(os.path.abspath(path))
url = urllib.parse.urljoin("file:", urllib.request.pathname2url(path))
return url
if normalize_path:
path = os.path.abspath(path)
if WINDOWS:
path = path.replace("\\", "/")

drive, tail = os.path.splitdrive(path)
if drive:
if drive[:4] == "//?/":
drive = drive[4:]
if drive[:4].upper() == "UNC/":
drive = "//" + drive[4:]
if drive[1:] == ":":
drive = "///" + drive
elif tail.startswith("/"):
tail = "//" + tail

encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
drive = urllib.parse.quote(drive, "/:", encoding, errors)
tail = urllib.parse.quote(tail, "/", encoding, errors)
return "file:" + drive + tail


def url_to_path(url: str) -> str:
"""
Convert a file: URL to a path.
"""
assert url.startswith(
"file:"
scheme, netloc, path = urllib.parse.urlsplit(url)[:3]
assert scheme == "file" or scheme.endswith(
"+file"
), f"You can only turn file: urls into filenames (not {url!r})"

_, netloc, path, _, _ = urllib.parse.urlsplit(url)
if WINDOWS:
# e.g. file://c:/foo
if netloc[1:2] == ":":
path = netloc + path

# e.g. file://server/share/foo
elif netloc and netloc != "localhost":
path = "//" + netloc + path

# e.g. file://///server/share/foo
elif path[:3] == "///":
path = path[1:]

# e.g. file:///c:/foo
elif path[:1] == "/" and path[2:3] == ":":
path = path[1:]

if not netloc or netloc == "localhost":
# According to RFC 8089, same as empty authority.
netloc = ""
elif WINDOWS:
# If we have a UNC path, prepend UNC share notation.
netloc = "\\\\" + netloc
else:
path = path.replace("/", "\\")
elif netloc and netloc != "localhost":
raise ValueError(
f"non-local file URIs are not supported on this platform: {url!r}"
)

path = urllib.request.url2pathname(netloc + path)

# On Windows, urlsplit parses the path as something like "/C:/Users/foo".
# This creates issues for path-related functions like io.open(), so we try
# to detect and strip the leading slash.
if (
WINDOWS
and not netloc # Not UNC.
and len(path) >= 3
and path[0] == "/" # Leading slash to strip.
and path[1] in string.ascii_letters # Drive letter.
and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
):
path = path[1:]

return path
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return urllib.parse.unquote(path, encoding, errors)
34 changes: 0 additions & 34 deletions tests/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
from io import BytesIO, StringIO
from textwrap import dedent
from typing import Any, AnyStr, Callable, Literal, Protocol, Union, cast
from urllib.parse import urlparse, urlunparse
from urllib.request import pathname2url
from zipfile import ZipFile

import pytest
Expand Down Expand Up @@ -1365,35 +1363,3 @@ def __call__(


CertFactory = Callable[[], str]

# -------------------------------------------------------------------------
# Accommodations for Windows path and URL changes in recent Python releases
# -------------------------------------------------------------------------

# versions containing fix/backport from https://github.com/python/cpython/pull/113563
# which changed the behavior of `urllib.parse.urlun{parse,split}`
url = "////path/to/file"
has_new_urlun_behavior = url == urlunparse(urlparse(url))

# the above change seems to only impact tests on Windows, so just add skips for that
skip_needs_new_urlun_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or not has_new_urlun_behavior,
reason="testing windows behavior for newer CPython",
)

skip_needs_old_urlun_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or has_new_urlun_behavior,
reason="testing windows behavior for older CPython",
)

# Trailing slashes are now preserved on Windows, matching POSIX behaviour.
# BPO: https://github.com/python/cpython/issues/126212
does_pathname2url_preserve_trailing_slash = pathname2url("C:/foo/").endswith("/")
skip_needs_new_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or not does_pathname2url_preserve_trailing_slash,
reason="testing windows (pathname2url) behavior for newer CPython",
)
skip_needs_old_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or does_pathname2url_preserve_trailing_slash,
reason="testing windows (pathname2url) behavior for older CPython",
)
63 changes: 16 additions & 47 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,13 @@
Link,
LinkHash,
MetadataFile,
_clean_file_url,
_clean_url_path,
_ensure_quoted_url,
)
from pip._internal.network.session import PipSession

from tests.lib import (
TestData,
make_test_link_collector,
skip_needs_new_pathname2url_trailing_slash_behavior_win,
skip_needs_new_urlun_behavior_win,
skip_needs_old_pathname2url_trailing_slash_behavior_win,
skip_needs_old_urlun_behavior_win,
)
from tests.lib import TestData, make_test_link_collector

ACCEPT = ", ".join(
[
Expand Down Expand Up @@ -298,31 +292,30 @@ def test_get_simple_response_dont_log_clear_text_password(
("a %2f b", "a%20%2F%20b"),
],
)
@pytest.mark.parametrize("is_local_path", [True, False])
def test_clean_url_path(path: str, expected: str, is_local_path: bool) -> None:
assert _clean_url_path(path, is_local_path=is_local_path) == expected
def test_clean_url_path(path: str, expected: str) -> None:
assert _clean_url_path(path) == expected


@pytest.mark.parametrize(
"path, expected",
"url, expected",
[
# Test a VCS path with a Windows drive letter and revision.
pytest.param(
"/T:/with space/[email protected]",
"///T:/with%20space/[email protected]",
"file:/T:/with space/[email protected]",
"file:///T:/with%20space/[email protected]",
marks=pytest.mark.skipif("sys.platform != 'win32'"),
),
# Test a VCS path with a Windows drive letter and revision,
# running on non-windows platform.
pytest.param(
"/T:/with space/[email protected]",
"/T%3A/with%20space/[email protected]",
"file:/T:/with space/[email protected]",
"file:///T%3A/with%20space/[email protected]",
marks=pytest.mark.skipif("sys.platform == 'win32'"),
),
],
)
def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
actual = _clean_url_path(path, is_local_path=True)
def test_clean_file_url(url: str, expected: str) -> None:
actual = _clean_file_url(url)
assert actual == expected


Expand Down Expand Up @@ -389,30 +382,11 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
),
# URL with Windows drive letter. The `:` after the drive
# letter should not be quoted. The trailing `/` should be
# removed.
pytest.param(
"file:///T:/path/with spaces/",
"file:///T:/path/with%20spaces",
marks=[
skip_needs_old_urlun_behavior_win,
skip_needs_old_pathname2url_trailing_slash_behavior_win,
],
),
pytest.param(
"file:///T:/path/with spaces/",
"file://///T:/path/with%20spaces",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test expectation was also incorrect - we expect a URL with three leading slashes, not five.

(This problem also occurs a couple more times further down.)

marks=[
skip_needs_new_urlun_behavior_win,
skip_needs_old_pathname2url_trailing_slash_behavior_win,
],
),
# retained.
pytest.param(
"file:///T:/path/with spaces/",
"file://///T:/path/with%20spaces/",
marks=[
skip_needs_new_urlun_behavior_win,
skip_needs_new_pathname2url_trailing_slash_behavior_win,
],
"file:///T:/path/with%20spaces/",
marks=pytest.mark.skipif("sys.platform != 'win32'"),
),
# URL with Windows drive letter, running on non-windows
# platform. The `:` after the drive should be quoted.
Expand All @@ -425,18 +399,13 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
pytest.param(
"git+file:///T:/with space/[email protected]#egg=my-package-1.0",
"git+file:///T:/with%20space/[email protected]#egg=my-package-1.0",
marks=skip_needs_old_urlun_behavior_win,
),
pytest.param(
"git+file:///T:/with space/[email protected]#egg=my-package-1.0",
"git+file://///T:/with%20space/[email protected]#egg=my-package-1.0",
marks=skip_needs_new_urlun_behavior_win,
marks=pytest.mark.skipif("sys.platform != 'win32'"),
),
# Test a VCS URL with a Windows drive letter and revision,
# running on non-windows platform.
pytest.param(
"git+file:///T:/with space/[email protected]#egg=my-package-1.0",
"git+file:/T%3A/with%20space/[email protected]#egg=my-package-1.0",
"git+file:///T%3A/with%20space/[email protected]#egg=my-package-1.0",
marks=pytest.mark.skipif("sys.platform == 'win32'"),
),
],
Expand Down
Loading