Skip to content

Commit 42b19ab

Browse files
authored
Merge pull request #11936 from sbidoul/fix-link-hash-parsing
Various fixes to the link hash parser
2 parents aebc0c5 + 89e7208 commit 42b19ab

File tree

3 files changed

+55
-15
lines changed

3 files changed

+55
-15
lines changed

news/11936.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix and improve the parsing of hashes embedded in URL fragments.

src/pip/_internal/models/link.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,25 +55,37 @@ class LinkHash:
5555
name: str
5656
value: str
5757

58-
_hash_re = re.compile(
58+
_hash_url_fragment_re = re.compile(
5959
# NB: we do not validate that the second group (.*) is a valid hex
6060
# digest. Instead, we simply keep that string in this class, and then check it
6161
# against Hashes when hash-checking is needed. This is easier to debug than
6262
# proactively discarding an invalid hex digest, as we handle incorrect hashes
6363
# and malformed hashes in the same place.
64-
r"({choices})=(.*)".format(
64+
r"[#&]({choices})=([^&]*)".format(
6565
choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
6666
),
6767
)
6868

6969
def __post_init__(self) -> None:
70-
assert self._hash_re.match(f"{self.name}={self.value}")
70+
assert self.name in _SUPPORTED_HASHES
71+
72+
@classmethod
73+
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
74+
"""Parse a PEP 658 data-dist-info-metadata hash."""
75+
if dist_info_metadata == "true":
76+
return None
77+
name, sep, value = dist_info_metadata.partition("=")
78+
if not sep:
79+
return None
80+
if name not in _SUPPORTED_HASHES:
81+
return None
82+
return cls(name=name, value=value)
7183

7284
@classmethod
7385
@functools.lru_cache(maxsize=None)
74-
def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
86+
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
7587
"""Search a string for a checksum algorithm name and encoded output value."""
76-
match = cls._hash_re.search(url)
88+
match = cls._hash_url_fragment_re.search(url)
7789
if match is None:
7890
return None
7991
name, value = match.groups()
@@ -217,7 +229,7 @@ def __init__(
217229
# trying to set a new value.
218230
self._url = url
219231

220-
link_hash = LinkHash.split_hash_name_and_value(url)
232+
link_hash = LinkHash.find_hash_url_fragment(url)
221233
hashes_from_link = {} if link_hash is None else link_hash.as_dict()
222234
if hashes is None:
223235
self._hashes = hashes_from_link
@@ -402,15 +414,10 @@ def metadata_link(self) -> Optional["Link"]:
402414
if self.dist_info_metadata is None:
403415
return None
404416
metadata_url = f"{self.url_without_fragment}.metadata"
405-
# If data-dist-info-metadata="true" is set, then the metadata file exists,
406-
# but there is no information about its checksum or anything else.
407-
if self.dist_info_metadata != "true":
408-
link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
409-
else:
410-
link_hash = None
411-
if link_hash is None:
417+
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
418+
if metadata_link_hash is None:
412419
return Link(metadata_url)
413-
return Link(metadata_url, hashes=link_hash.as_dict())
420+
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
414421

415422
def as_hashes(self) -> Hashes:
416423
return Hashes({k: [v] for k, v in self._hashes.items()})

tests/unit/test_collector.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,21 @@ def expand_path(path: str) -> str:
10511051
"https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe",
10521052
LinkHash("sha256", "aa113592bbe"),
10531053
),
1054+
(
1055+
"https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe&subdirectory=setup",
1056+
LinkHash("sha256", "aa113592bbe"),
1057+
),
1058+
(
1059+
"https://pypi.org/pip-18.0.tar.gz#subdirectory=setup&sha256=aa113592bbe",
1060+
LinkHash("sha256", "aa113592bbe"),
1061+
),
1062+
# "xsha256" is not a valid algorithm, so we discard it.
1063+
("https://pypi.org/pip-18.0.tar.gz#xsha256=aa113592bbe", None),
1064+
# Empty hash.
1065+
(
1066+
"https://pypi.org/pip-18.0.tar.gz#sha256=",
1067+
LinkHash("sha256", ""),
1068+
),
10541069
(
10551070
"https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
10561071
LinkHash("md5", "aa113592bbe"),
@@ -1061,4 +1076,21 @@ def expand_path(path: str) -> str:
10611076
],
10621077
)
10631078
def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
1064-
assert LinkHash.split_hash_name_and_value(url) == result
1079+
assert LinkHash.find_hash_url_fragment(url) == result
1080+
1081+
1082+
@pytest.mark.parametrize(
1083+
"dist_info_metadata, result",
1084+
[
1085+
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
1086+
("sha256=", LinkHash("sha256", "")),
1087+
("sha500=aa113592bbe", None),
1088+
("true", None),
1089+
("", None),
1090+
("aa113592bbe", None),
1091+
],
1092+
)
1093+
def test_pep658_hash_parsing(
1094+
dist_info_metadata: str, result: Optional[LinkHash]
1095+
) -> None:
1096+
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result

0 commit comments

Comments
 (0)