Skip to content

Commit f7cfbc3

Browse files
committed
Improve use of url2pathname()
Call `_clean_file_url_path()` (thence `url2pathname()`) with a complete URL path, rather than parts of the path after splitting on `/@|%2f/`. This lays the groundwork for using pip's own URL utilities from `link.py`.
1 parent 4fe68d7 commit f7cfbc3

File tree

2 files changed

+9
-14
lines changed

2 files changed

+9
-14
lines changed

news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst

Whitespace-only changes.

src/pip/_internal/models/link.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import functools
4-
import itertools
54
import logging
65
import os
76
import posixpath
@@ -19,7 +18,6 @@
1918
from pip._internal.utils.filetypes import WHEEL_EXTENSION
2019
from pip._internal.utils.hashes import Hashes
2120
from pip._internal.utils.misc import (
22-
pairwise,
2321
redact_auth_from_url,
2422
split_auth_from_netloc,
2523
splitext,
@@ -113,12 +111,12 @@ def supported_hashes(hashes: dict[str, str] | None) -> dict[str, str] | None:
113111
return hashes
114112

115113

116-
def _clean_url_path_part(part: str) -> str:
114+
def _clean_url_path_part(part: str, safe: str = "/") -> str:
117115
"""
118116
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
119117
"""
120118
# We unquote prior to quoting to make sure nothing is double quoted.
121-
return urllib.parse.quote(urllib.parse.unquote(part))
119+
return urllib.parse.quote(urllib.parse.unquote(part), safe)
122120

123121

124122
def _clean_file_url_path(part: str) -> str:
@@ -140,6 +138,7 @@ def _clean_file_url_path(part: str) -> str:
140138

141139
# percent-encoded: /
142140
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
141+
_escaped_chars_re = re.compile("---PIP-(%40|/)-PIP---")
143142

144143

145144
def _clean_url_path(path: str, is_local_path: bool) -> str:
@@ -151,17 +150,13 @@ def _clean_url_path(path: str, is_local_path: bool) -> str:
151150
else:
152151
clean_func = _clean_url_path_part
153152

154-
# Split on the reserved characters prior to cleaning so that
153+
# Tag the reserved characters prior to cleaning so that
155154
# revision strings in VCS URLs are properly preserved.
156-
parts = _reserved_chars_re.split(path)
157-
158-
cleaned_parts = []
159-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
160-
cleaned_parts.append(clean_func(to_clean))
161-
# Normalize %xx escapes (e.g. %2f -> %2F)
162-
cleaned_parts.append(reserved.upper())
163-
164-
return "".join(cleaned_parts)
155+
path = _reserved_chars_re.sub(r"---PIP-\1-PIP---", path)
156+
path = clean_func(path)
157+
# Untag and restore the reserved characters.
158+
path = _escaped_chars_re.sub(lambda m: _clean_url_path_part(m[1], safe="@"), path)
159+
return path
165160

166161

167162
def _ensure_quoted_url(url: str) -> str:

0 commit comments

Comments
 (0)