Skip to content

Make conversion of file URLs more consistent #13501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ jobs:
- "3.11"
- "3.12"
- "3.13"
- "3.14"

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -181,6 +182,7 @@ jobs:
# - "3.11"
# - "3.12"
- "3.13"
- "3.14"
group:
- { number: 1, pytest-filter: "not test_install" }
- { number: 2, pytest-filter: "test_install" }
Expand Down
1 change: 1 addition & 0 deletions news/13501.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make conversion of file URLs more consistent across Python versions.
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def should_update_common_wheels() -> bool:
# -----------------------------------------------------------------------------
# Development Commands
# -----------------------------------------------------------------------------
@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3"])
@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "pypy3"])
def test(session: nox.Session) -> None:
# Get the common wheels.
if should_update_common_wheels():
Expand Down
29 changes: 6 additions & 23 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
split_auth_from_netloc,
splitext,
)
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._internal.utils.urls import clean_file_url, path_to_url, url_to_path

if TYPE_CHECKING:
from pip._internal.index.collector import IndexContent
Expand Down Expand Up @@ -121,39 +121,21 @@ def _clean_url_path_part(part: str) -> str:
return urllib.parse.quote(urllib.parse.unquote(part))


def _clean_file_url_path(part: str) -> str:
"""
Clean the first part of a URL path that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib.request.pathname2url(urllib.request.url2pathname(part))


# percent-encoded: /
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)


def _clean_url_path(path: str, is_local_path: bool) -> str:
def _clean_url_path(path: str) -> str:
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_func = _clean_file_url_path
else:
clean_func = _clean_url_path_part

# Split on the reserved characters prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = _reserved_chars_re.split(path)

cleaned_parts = []
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
cleaned_parts.append(clean_func(to_clean))
cleaned_parts.append(_clean_url_path_part(to_clean))
# Normalize %xx escapes (e.g. %2f -> %2F)
cleaned_parts.append(reserved.upper())

Expand All @@ -170,8 +152,9 @@ def _ensure_quoted_url(url: str) -> str:
# `scheme://netloc/path?query#fragment`.
result = urllib.parse.urlsplit(url)
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
if not result.netloc:
return clean_file_url(url)
path = _clean_url_path(result.path)
return urllib.parse.urlunsplit(result._replace(path=path))


Expand Down
108 changes: 74 additions & 34 deletions src/pip/_internal/utils/urls.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,95 @@
import os
import string
import sys
import urllib.parse
import urllib.request

from .compat import WINDOWS


def path_to_url(path: str) -> str:
def path_to_url(path: str, normalize_path: bool = True) -> str:
"""
Convert a path to a file: URL. The path will be made absolute and have
quoted path parts.
Convert a path to a file: URL with quoted path parts. The path will be
normalized and made absolute if *normalize_path* is true (the default.)
"""
path = os.path.normpath(os.path.abspath(path))
url = urllib.parse.urljoin("file:", urllib.request.pathname2url(path))
return url
if normalize_path:
path = os.path.abspath(path)
if WINDOWS:
path = path.replace("\\", "/")

drive, tail = os.path.splitdrive(path)
if drive:
if drive[:4] == "//?/":
drive = drive[4:]
if drive[:4].upper() == "UNC/":
drive = "//" + drive[4:]
if drive[1:] == ":":
drive = "///" + drive
elif tail.startswith("/"):
tail = "//" + tail

encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
drive = urllib.parse.quote(drive, "/:", encoding, errors)
tail = urllib.parse.quote(tail, "/", encoding, errors)
return "file:" + drive + tail


def url_to_path(url: str) -> str:
"""
Convert a file: URL to a path.
"""
assert url.startswith(
"file:"
scheme, netloc, path = urllib.parse.urlsplit(url)[:3]
assert scheme == "file" or scheme.endswith(
"+file"
), f"You can only turn file: urls into filenames (not {url!r})"

_, netloc, path, _, _ = urllib.parse.urlsplit(url)
if WINDOWS:
# e.g. file://c:/foo
if netloc[1:2] == ":":
path = netloc + path

# e.g. file://server/share/foo
elif netloc and netloc != "localhost":
path = "//" + netloc + path

# e.g. file://///server/share/foo
elif path[:3] == "///":
path = path[1:]

if not netloc or netloc == "localhost":
# According to RFC 8089, same as empty authority.
netloc = ""
elif WINDOWS:
# If we have a UNC path, prepend UNC share notation.
netloc = "\\\\" + netloc
else:
# e.g. file:///c:/foo
elif path[:1] == "/" and path[2:3] == ":":
path = path[1:]

path = path.replace("/", "\\")
elif netloc and netloc != "localhost":
raise ValueError(
f"non-local file URIs are not supported on this platform: {url!r}"
)

path = urllib.request.url2pathname(netloc + path)

# On Windows, urlsplit parses the path as something like "/C:/Users/foo".
# This creates issues for path-related functions like io.open(), so we try
# to detect and strip the leading slash.
if (
WINDOWS
and not netloc # Not UNC.
and len(path) >= 3
and path[0] == "/" # Leading slash to strip.
and path[1] in string.ascii_letters # Drive letter.
and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
):
path = path[1:]

return path
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return urllib.parse.unquote(path, encoding, errors)


def clean_file_url(url: str) -> str:
"""
Fix up quoting and leading slashes in the given file: URL.

e.g. 'file:/c:/foo [email protected]' --> 'file:///c:/foo%[email protected]'.
"""
# Replace "@" characters to protect them from percent-encoding.
at_symbol_token = "---PIP_AT_SYMBOL---"
assert at_symbol_token not in url
url = url.replace("@", at_symbol_token)
parts = urllib.parse.urlsplit(url)

# Convert URL to a file path and back. This normalizes the netloc and
# path, but resets the other URL components.
tidy_url = path_to_url(url_to_path(url), normalize_path=False)
tidy_parts = urllib.parse.urlsplit(tidy_url)

# Restore the original scheme, query and fragment components.
url = urllib.parse.urlunsplit(tidy_parts[:3] + parts[3:])
url = url.replace(tidy_parts.scheme, parts.scheme, 1)

# Restore "@" characters that were replaced earlier.
return url.replace(at_symbol_token, "@")
19 changes: 3 additions & 16 deletions src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import os.path
import pathlib
import re
import urllib.parse
import urllib.request
from dataclasses import replace
from typing import Any

from pip._internal.exceptions import BadCommand, InstallationError
from pip._internal.utils.misc import HiddenText, display_path, hide_url
from pip._internal.utils.subprocess import make_command
from pip._internal.utils.urls import clean_file_url
from pip._internal.vcs.versioncontrol import (
AuthInfo,
RemoteNotFoundError,
Expand All @@ -22,10 +21,6 @@
vcs,
)

urlsplit = urllib.parse.urlsplit
urlunsplit = urllib.parse.urlunsplit


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -502,16 +497,8 @@ def get_url_rev_and_auth(cls, url: str) -> tuple[str, str | None, AuthInfo]:
"""
# Works around an apparent Git bug
# (see https://article.gmane.org/gmane.comp.version-control.git/146500)
scheme, netloc, path, query, fragment = urlsplit(url)
if scheme.endswith("file"):
initial_slashes = path[: -len(path.lstrip("/"))]
newpath = initial_slashes + urllib.request.url2pathname(path).replace(
"\\", "/"
).lstrip("/")
after_plus = scheme.find("+") + 1
url = scheme[:after_plus] + urlunsplit(
(scheme[after_plus:], netloc, newpath, query, fragment),
)
if url.startswith("git+file:"):
url = clean_file_url(url)

if "://" not in url:
assert "file:" not in url
Expand Down
34 changes: 0 additions & 34 deletions tests/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
from io import BytesIO, StringIO
from textwrap import dedent
from typing import Any, AnyStr, Callable, Literal, Protocol, Union, cast
from urllib.parse import urlparse, urlunparse
from urllib.request import pathname2url
from zipfile import ZipFile

import pytest
Expand Down Expand Up @@ -1365,35 +1363,3 @@ def __call__(


CertFactory = Callable[[], str]

# -------------------------------------------------------------------------
# Accommodations for Windows path and URL changes in recent Python releases
# -------------------------------------------------------------------------

# versions containing fix/backport from https://github.com/python/cpython/pull/113563
# which changed the behavior of `urllib.parse.urlun{parse,split}`
url = "////path/to/file"
has_new_urlun_behavior = url == urlunparse(urlparse(url))

# the above change seems to only impact tests on Windows, so just add skips for that
skip_needs_new_urlun_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or not has_new_urlun_behavior,
reason="testing windows behavior for newer CPython",
)

skip_needs_old_urlun_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or has_new_urlun_behavior,
reason="testing windows behavior for older CPython",
)

# Trailing slashes are now preserved on Windows, matching POSIX behaviour.
# BPO: https://github.com/python/cpython/issues/126212
does_pathname2url_preserve_trailing_slash = pathname2url("C:/foo/").endswith("/")
skip_needs_new_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or not does_pathname2url_preserve_trailing_slash,
reason="testing windows (pathname2url) behavior for newer CPython",
)
skip_needs_old_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif(
sys.platform != "win32" or does_pathname2url_preserve_trailing_slash,
reason="testing windows (pathname2url) behavior for older CPython",
)
Loading
Loading