pypa · barneygale · Jul 21, 2025 · Jul 21, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/news/921e9395-3424-4731-a909-2ce5e029d613.trivial.rst b/news/921e9395-3424-4731-a909-2ce5e029d613.trivial.rst
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import functools
-import itertools
 import logging
 import os
 import posixpath
@@ -19,12 +18,11 @@
 from pip._internal.utils.filetypes import WHEEL_EXTENSION
 from pip._internal.utils.hashes import Hashes
 from pip._internal.utils.misc import (
-    pairwise,
     redact_auth_from_url,
     split_auth_from_netloc,
     splitext,
 )
-from pip._internal.utils.urls import path_to_url, url_to_path
+from pip._internal.utils.urls import clean_url, path_to_url, url_to_path
 
 if TYPE_CHECKING:
     from pip._internal.index.collector import IndexContent
@@ -113,68 +111,6 @@ def supported_hashes(hashes: dict[str, str] | None) -> dict[str, str] | None:
     return hashes
 
 
-def _clean_url_path_part(part: str) -> str:
-    """
-    Clean a "part" of a URL path (i.e. after splitting on "@" characters).
-    """
-    # We unquote prior to quoting to make sure nothing is double quoted.
-    return urllib.parse.quote(urllib.parse.unquote(part))
-
-
-def _clean_file_url_path(part: str) -> str:
-    """
-    Clean the first part of a URL path that corresponds to a local
-    filesystem path (i.e. the first part after splitting on "@" characters).
-    """
-    # We unquote prior to quoting to make sure nothing is double quoted.
-    # Also, on Windows the path part might contain a drive letter which
-    # should not be quoted. On Linux where drive letters do not
-    # exist, the colon should be quoted. We rely on urllib.request
-    # to do the right thing here.
-    return urllib.request.pathname2url(urllib.request.url2pathname(part))
-
-
-# percent-encoded:                   /
-_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
-
-
-def _clean_url_path(path: str, is_local_path: bool) -> str:
-    """
-    Clean the path portion of a URL.
-    """
-    if is_local_path:
-        clean_func = _clean_file_url_path
-    else:
-        clean_func = _clean_url_path_part
-
-    # Split on the reserved characters prior to cleaning so that
-    # revision strings in VCS URLs are properly preserved.
-    parts = _reserved_chars_re.split(path)
-
-    cleaned_parts = []
-    for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
-        cleaned_parts.append(clean_func(to_clean))
-        # Normalize %xx escapes (e.g. %2f -> %2F)
-        cleaned_parts.append(reserved.upper())
-
-    return "".join(cleaned_parts)
-
-
-def _ensure_quoted_url(url: str) -> str:
-    """
-    Make sure a link is fully quoted.
-    For example, if ' ' occurs in the URL, it will be replaced with "%20",
-    and without double-quoting other characters.
-    """
-    # Split the URL into parts according to the general structure
-    # `scheme://netloc/path?query#fragment`.
-    result = urllib.parse.urlsplit(url)
-    # If the netloc is empty, then the URL refers to a local filesystem path.
-    is_local_path = not result.netloc
-    path = _clean_url_path(result.path, is_local_path=is_local_path)
-    return urllib.parse.urlunsplit(result._replace(path=path))
-
-
 def _absolute_link_url(base_url: str, url: str) -> str:
     """
     A faster implementation of urllib.parse.urljoin with a shortcut
@@ -281,7 +217,7 @@ def from_json(
         if file_url is None:
             return None
 
-        url = _ensure_quoted_url(_absolute_link_url(page_url, file_url))
+        url = clean_url(_absolute_link_url(page_url, file_url))
         pyrequire = file_data.get("requires-python")
         yanked_reason = file_data.get("yanked")
         hashes = file_data.get("hashes", {})
@@ -333,7 +269,7 @@ def from_element(
         if not href:
             return None
 
-        url = _ensure_quoted_url(_absolute_link_url(base_url, href))
+        url = clean_url(_absolute_link_url(base_url, href))
         pyrequire = anchor_attribs.get("data-requires-python")
         yanked_reason = anchor_attribs.get("data-yanked")
 

diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py
@@ -1,8 +1,12 @@
+import itertools
 import os
+import re
 import string
 import urllib.parse
 import urllib.request
 
+from pip._internal.utils.misc import pairwise
+
 from .compat import WINDOWS
 
 
@@ -53,3 +57,65 @@ def url_to_path(url: str) -> str:
         path = path[1:]
 
     return path
+
+
+def _clean_url_path_part(part: str) -> str:
+    """
+    Clean a "part" of a URL path (i.e. after splitting on "@" characters).
+    """
+    # We unquote prior to quoting to make sure nothing is double quoted.
+    return urllib.parse.quote(urllib.parse.unquote(part))
+
+
+def _clean_file_url_path(part: str) -> str:
+    """
+    Clean the first part of a URL path that corresponds to a local
+    filesystem path (i.e. the first part after splitting on "@" characters).
+    """
+    # We unquote prior to quoting to make sure nothing is double quoted.
+    # Also, on Windows the path part might contain a drive letter which
+    # should not be quoted. On Linux where drive letters do not
+    # exist, the colon should be quoted. We rely on urllib.request
+    # to do the right thing here.
+    return urllib.request.pathname2url(urllib.request.url2pathname(part))
+
+
+# percent-encoded:                   /
+_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+
+
+def _clean_url_path(path: str, is_local_path: bool) -> str:
+    """
+    Clean the path portion of a URL.
+    """
+    if is_local_path:
+        clean_func = _clean_file_url_path
+    else:
+        clean_func = _clean_url_path_part
+
+    # Split on the reserved characters prior to cleaning so that
+    # revision strings in VCS URLs are properly preserved.
+    parts = _reserved_chars_re.split(path)
+
+    cleaned_parts = []
+    for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
+        cleaned_parts.append(clean_func(to_clean))
+        # Normalize %xx escapes (e.g. %2f -> %2F)
+        cleaned_parts.append(reserved.upper())
+
+    return "".join(cleaned_parts)
+
+
+def clean_url(url: str) -> str:
+    """
+    Make sure a link is fully quoted.
+    For example, if ' ' occurs in the URL, it will be replaced with "%20",
+    and without double-quoting other characters.
+    """
+    # Split the URL into parts according to the general structure
+    # `scheme://netloc/path?query#fragment`.
+    result = urllib.parse.urlsplit(url)
+    # If the netloc is empty, then the URL refers to a local filesystem path.
+    is_local_path = not result.netloc
+    path = _clean_url_path(result.path, is_local_path=is_local_path)
+    return urllib.parse.urlunsplit(result._replace(path=path))
diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py
@@ -4,14 +4,13 @@
 import os.path
 import pathlib
 import re
-import urllib.parse
-import urllib.request
 from dataclasses import replace
 from typing import Any
 
 from pip._internal.exceptions import BadCommand, InstallationError
 from pip._internal.utils.misc import HiddenText, display_path, hide_url
 from pip._internal.utils.subprocess import make_command
+from pip._internal.utils.urls import clean_url
 from pip._internal.vcs.versioncontrol import (
     AuthInfo,
     RemoteNotFoundError,
@@ -22,10 +21,6 @@
     vcs,
 )
 
-urlsplit = urllib.parse.urlsplit
-urlunsplit = urllib.parse.urlunsplit
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -502,16 +497,8 @@ def get_url_rev_and_auth(cls, url: str) -> tuple[str, str | None, AuthInfo]:
         """
         # Works around an apparent Git bug
         # (see https://article.gmane.org/gmane.comp.version-control.git/146500)
-        scheme, netloc, path, query, fragment = urlsplit(url)
-        if scheme.endswith("file"):
-            initial_slashes = path[: -len(path.lstrip("/"))]
-            newpath = initial_slashes + urllib.request.url2pathname(path).replace(
-                "\\", "/"
-            ).lstrip("/")
-            after_plus = scheme.find("+") + 1
-            url = scheme[:after_plus] + urlunsplit(
-                (scheme[after_plus:], netloc, newpath, query, fragment),
-            )
+        if url.startswith("git+file:"):
+            url = "git+" + clean_url(url[4:])
 
         if "://" not in url:
             assert "file:" not in url