Skip to content

Commit 6c3db09

Browse files
committed
Fix parsing of JSON index dist-info-metadata values
1 parent 72a32e9 commit 6c3db09

File tree

2 files changed

+89
-45
lines changed

2 files changed

+89
-45
lines changed

src/pip/_internal/models/link.py

Lines changed: 65 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,6 @@ class LinkHash:
6969
def __post_init__(self) -> None:
7070
assert self.name in _SUPPORTED_HASHES
7171

72-
@classmethod
73-
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
74-
"""Parse a PEP 658 data-dist-info-metadata hash."""
75-
if dist_info_metadata == "true":
76-
return None
77-
name, sep, value = dist_info_metadata.partition("=")
78-
if not sep:
79-
return None
80-
if name not in _SUPPORTED_HASHES:
81-
return None
82-
return cls(name=name, value=value)
83-
8472
@classmethod
8573
@functools.lru_cache(maxsize=None)
8674
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
@@ -107,6 +95,20 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
10795
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
10896

10997

98+
@dataclass(frozen=True)
99+
class MetadataFile:
100+
"""Information about a core metadata file associated with a distribution."""
101+
102+
hashes: Optional[dict[str, str]]
103+
104+
# TODO: Do we care about stripping out unsupported hash methods?
105+
def __init__(self, hashes: Optional[dict[str, str]]):
106+
if hashes:
107+
hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
108+
# We need to use this as this is a frozen dataclass
109+
object.__setattr__(self, "hashes", hashes)
110+
111+
110112
def _clean_url_path_part(part: str) -> str:
111113
"""
112114
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -179,7 +181,7 @@ class Link(KeyBasedCompareMixin):
179181
"comes_from",
180182
"requires_python",
181183
"yanked_reason",
182-
"dist_info_metadata",
184+
"metadata_file_data",
183185
"cache_link_parsing",
184186
"egg_fragment",
185187
]
@@ -190,7 +192,7 @@ def __init__(
190192
comes_from: Optional[Union[str, "IndexContent"]] = None,
191193
requires_python: Optional[str] = None,
192194
yanked_reason: Optional[str] = None,
193-
dist_info_metadata: Optional[str] = None,
195+
metadata_file_data: Optional[MetadataFile] = None,
194196
cache_link_parsing: bool = True,
195197
hashes: Optional[Mapping[str, str]] = None,
196198
) -> None:
@@ -208,18 +210,21 @@ def __init__(
208210
a simple repository HTML link. If the file has been yanked but
209211
no reason was provided, this should be the empty string. See
210212
PEP 592 for more information and the specification.
211-
:param dist_info_metadata: the metadata attached to the file, or None if no such
212-
metadata is provided. This is the value of the "data-dist-info-metadata"
213-
attribute, if present, in a simple repository HTML link. This may be parsed
214-
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
215-
information and the specification.
213+
:param metadata_file_data: the metadata attached to the file, or None if
214+
no such metadata is provided. This argument, if not None, indicates
215+
that a separate metadata file exists, and also optionally supplies
216+
hashes for that file.
216217
:param cache_link_parsing: A flag that is used elsewhere to determine
217218
whether resources retrieved from this link should be cached. PyPI
218219
URLs should generally have this set to False, for example.
219220
:param hashes: A mapping of hash names to digests to allow us to
220221
determine the validity of a download.
221222
"""
222223

224+
# The comes_from, requires_python, and metadata_file_data arguments are
225+
# only used by classmethods of this class, and are not used in client
226+
# code directly.
227+
223228
# url can be a UNC windows share
224229
if url.startswith("\\\\"):
225230
url = path_to_url(url)
@@ -239,7 +244,7 @@ def __init__(
239244
self.comes_from = comes_from
240245
self.requires_python = requires_python if requires_python else None
241246
self.yanked_reason = yanked_reason
242-
self.dist_info_metadata = dist_info_metadata
247+
self.metadata_file_data = metadata_file_data
243248

244249
super().__init__(key=url, defining_class=Link)
245250

@@ -262,9 +267,20 @@ def from_json(
262267
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
263268
pyrequire = file_data.get("requires-python")
264269
yanked_reason = file_data.get("yanked")
265-
dist_info_metadata = file_data.get("dist-info-metadata")
266270
hashes = file_data.get("hashes", {})
267271

272+
# The dist-info-metadata value may be a boolean, or a dict of hashes.
273+
metadata_info = file_data.get("dist-info-metadata", False)
274+
if isinstance(metadata_info, dict):
275+
# The file exists, and hashes have been supplied
276+
metadata_file_data = MetadataFile(metadata_info)
277+
elif metadata_info:
278+
# The file exists, but there are no hashes
279+
metadata_file_data = MetadataFile(None)
280+
else:
281+
# The file does not exist
282+
metadata_file_data = None
283+
268284
# The Link.yanked_reason expects an empty string instead of a boolean.
269285
if yanked_reason and not isinstance(yanked_reason, str):
270286
yanked_reason = ""
@@ -278,7 +294,7 @@ def from_json(
278294
requires_python=pyrequire,
279295
yanked_reason=yanked_reason,
280296
hashes=hashes,
281-
dist_info_metadata=dist_info_metadata,
297+
metadata_file_data=metadata_file_data,
282298
)
283299

284300
@classmethod
@@ -298,14 +314,35 @@ def from_element(
298314
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
299315
pyrequire = anchor_attribs.get("data-requires-python")
300316
yanked_reason = anchor_attribs.get("data-yanked")
301-
dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
317+
318+
# The dist-info-metadata value may be the string "true", or a string of
319+
# the form "hashname=hashval"
320+
metadata_info = anchor_attribs.get("data-dist-info-metadata")
321+
if metadata_info == "true":
322+
# The file exists, but there are no hashes
323+
metadata_file_data = MetadataFile(None)
324+
elif metadata_info is None:
325+
# The file does not exist
326+
metadata_file_data = None
327+
else:
328+
# The file exists, and hashes have been supplied
329+
hashname, sep, hashval = metadata_info.partition("=")
330+
if sep == "=":
331+
metadata_file_data = MetadataFile({hashname: hashval})
332+
else:
333+
# Error - data is wrong. Treat as no hashes supplied.
334+
logger.debug(
335+
"Index returned invalid data-dist-info-metadata value: %s",
336+
metadata_info,
337+
)
338+
metadata_file_data = MetadataFile(None)
302339

303340
return cls(
304341
url,
305342
comes_from=page_url,
306343
requires_python=pyrequire,
307344
yanked_reason=yanked_reason,
308-
dist_info_metadata=dist_info_metadata,
345+
metadata_file_data=metadata_file_data,
309346
)
310347

311348
def __str__(self) -> str:
@@ -407,17 +444,13 @@ def subdirectory_fragment(self) -> Optional[str]:
407444
return match.group(1)
408445

409446
def metadata_link(self) -> Optional["Link"]:
410-
"""Implementation of PEP 658 parsing."""
411-
# Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
412-
# from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
413-
# gets set.
414-
if self.dist_info_metadata is None:
447+
"""Return a link to the associated core metadata file (if any)."""
448+
if self.metadata_file_data is None:
415449
return None
416450
metadata_url = f"{self.url_without_fragment}.metadata"
417-
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
418-
if metadata_link_hash is None:
451+
if self.metadata_file_data.hashes is None:
419452
return Link(metadata_url)
420-
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
453+
return Link(metadata_url, hashes=self.metadata_file_data.hashes)
421454

422455
def as_hashes(self) -> Hashes:
423456
return Hashes({k: [v] for k, v in self._hashes.items()})

tests/unit/test_collector.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pip._internal.models.link import (
3131
Link,
3232
LinkHash,
33+
MetadataFile,
3334
_clean_url_path,
3435
_ensure_quoted_url,
3536
)
@@ -527,7 +528,7 @@ def test_parse_links_json() -> None:
527528
requires_python=">=3.7",
528529
yanked_reason=None,
529530
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
530-
dist_info_metadata="sha512=aabdd41",
531+
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
531532
),
532533
]
533534

@@ -603,12 +604,12 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
603604
),
604605
],
605606
)
606-
def test_parse_links__dist_info_metadata(
607+
def test_parse_links__metadata_file_data(
607608
anchor_html: str,
608609
expected: Optional[str],
609610
hashes: Dict[str, str],
610611
) -> None:
611-
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
612+
link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
612613
assert link._hashes == hashes
613614

614615

@@ -1080,17 +1081,27 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
10801081

10811082

10821083
@pytest.mark.parametrize(
1083-
"dist_info_metadata, result",
1084+
"metadata_attrib, expected",
10841085
[
1085-
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
1086-
("sha256=", LinkHash("sha256", "")),
1087-
("sha500=aa113592bbe", None),
1088-
("true", None),
1089-
("", None),
1090-
("aa113592bbe", None),
1086+
("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
1087+
("sha256=", MetadataFile({"sha256": ""})),
1088+
("sha500=aa113592bbe", MetadataFile({})),
1089+
("true", MetadataFile(None)),
1090+
(None, None),
1091+
# TODO: Are these correct?
1092+
("", MetadataFile(None)),
1093+
("aa113592bbe", MetadataFile(None)),
10911094
],
10921095
)
1093-
def test_pep658_hash_parsing(
1094-
dist_info_metadata: str, result: Optional[LinkHash]
1096+
def test_metadata_file_info_parsing_html(
1097+
metadata_attrib: str, expected: Optional[MetadataFile]
10951098
) -> None:
1096-
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
1099+
attribs: Dict[str, Optional[str]] = {
1100+
"href": "something",
1101+
"data-dist-info-metadata": metadata_attrib,
1102+
}
1103+
page_url = "dummy_for_comes_from"
1104+
base_url = "https://index.url/simple"
1105+
link = Link.from_element(attribs, page_url, base_url)
1106+
assert link is not None and link.metadata_file_data == expected
1107+
# TODO: Do we need to do something for the JSON data?

0 commit comments

Comments
 (0)