@@ -69,18 +69,6 @@ class LinkHash:
69
69
def __post_init__ (self ) -> None :
70
70
assert self .name in _SUPPORTED_HASHES
71
71
72
- @classmethod
73
- def parse_pep658_hash (cls , dist_info_metadata : str ) -> Optional ["LinkHash" ]:
74
- """Parse a PEP 658 data-dist-info-metadata hash."""
75
- if dist_info_metadata == "true" :
76
- return None
77
- name , sep , value = dist_info_metadata .partition ("=" )
78
- if not sep :
79
- return None
80
- if name not in _SUPPORTED_HASHES :
81
- return None
82
- return cls (name = name , value = value )
83
-
84
72
@classmethod
85
73
@functools .lru_cache (maxsize = None )
86
74
def find_hash_url_fragment (cls , url : str ) -> Optional ["LinkHash" ]:
@@ -107,6 +95,20 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
107
95
return hashes .is_hash_allowed (self .name , hex_digest = self .value )
108
96
109
97
98
+ @dataclass (frozen = True )
99
+ class MetadataFile :
100
+ """Information about a core metadata file associated with a distribution."""
101
+
102
+ hashes : Optional [dict [str , str ]]
103
+
104
+ # TODO: Do we care about stripping out unsupported hash methods?
105
+ def __init__ (self , hashes : Optional [dict [str , str ]]):
106
+ if hashes :
107
+ hashes = {n : v for n , v in hashes .items () if n in _SUPPORTED_HASHES }
108
+ # We need to use this as this is a frozen dataclass
109
+ object .__setattr__ (self , "hashes" , hashes )
110
+
111
+
110
112
def _clean_url_path_part (part : str ) -> str :
111
113
"""
112
114
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -179,7 +181,7 @@ class Link(KeyBasedCompareMixin):
179
181
"comes_from" ,
180
182
"requires_python" ,
181
183
"yanked_reason" ,
182
- "dist_info_metadata " ,
184
+ "metadata_file_data " ,
183
185
"cache_link_parsing" ,
184
186
"egg_fragment" ,
185
187
]
@@ -190,7 +192,7 @@ def __init__(
190
192
comes_from : Optional [Union [str , "IndexContent" ]] = None ,
191
193
requires_python : Optional [str ] = None ,
192
194
yanked_reason : Optional [str ] = None ,
193
- dist_info_metadata : Optional [str ] = None ,
195
+ metadata_file_data : Optional [MetadataFile ] = None ,
194
196
cache_link_parsing : bool = True ,
195
197
hashes : Optional [Mapping [str , str ]] = None ,
196
198
) -> None :
@@ -208,18 +210,21 @@ def __init__(
208
210
a simple repository HTML link. If the file has been yanked but
209
211
no reason was provided, this should be the empty string. See
210
212
PEP 592 for more information and the specification.
211
- :param dist_info_metadata: the metadata attached to the file, or None if no such
212
- metadata is provided. This is the value of the "data-dist-info-metadata"
213
- attribute, if present, in a simple repository HTML link. This may be parsed
214
- into its own `Link` by `self.metadata_link()`. See PEP 658 for more
215
- information and the specification.
213
+ :param metadata_file_data: the metadata attached to the file, or None if
214
+ no such metadata is provided. This argument, if not None, indicates
215
+ that a separate metadata file exists, and also optionally supplies
216
+ hashes for that file.
216
217
:param cache_link_parsing: A flag that is used elsewhere to determine
217
218
whether resources retrieved from this link should be cached. PyPI
218
219
URLs should generally have this set to False, for example.
219
220
:param hashes: A mapping of hash names to digests to allow us to
220
221
determine the validity of a download.
221
222
"""
222
223
224
+ # The comes_from, requires_python, and metadata_file_data arguments are
225
+ # only used by classmethods of this class, and are not used in client
226
+ # code directly.
227
+
223
228
# url can be a UNC windows share
224
229
if url .startswith ("\\ \\ " ):
225
230
url = path_to_url (url )
@@ -239,7 +244,7 @@ def __init__(
239
244
self .comes_from = comes_from
240
245
self .requires_python = requires_python if requires_python else None
241
246
self .yanked_reason = yanked_reason
242
- self .dist_info_metadata = dist_info_metadata
247
+ self .metadata_file_data = metadata_file_data
243
248
244
249
super ().__init__ (key = url , defining_class = Link )
245
250
@@ -262,9 +267,20 @@ def from_json(
262
267
url = _ensure_quoted_url (urllib .parse .urljoin (page_url , file_url ))
263
268
pyrequire = file_data .get ("requires-python" )
264
269
yanked_reason = file_data .get ("yanked" )
265
- dist_info_metadata = file_data .get ("dist-info-metadata" )
266
270
hashes = file_data .get ("hashes" , {})
267
271
272
+ # The dist-info-metadata value may be a boolean, or a dict of hashes.
273
+ metadata_info = file_data .get ("dist-info-metadata" , False )
274
+ if isinstance (metadata_info , dict ):
275
+ # The file exists, and hashes have been supplied
276
+ metadata_file_data = MetadataFile (metadata_info )
277
+ elif metadata_info :
278
+ # The file exists, but there are no hashes
279
+ metadata_file_data = MetadataFile (None )
280
+ else :
281
+ # The file does not exist
282
+ metadata_file_data = None
283
+
268
284
# The Link.yanked_reason expects an empty string instead of a boolean.
269
285
if yanked_reason and not isinstance (yanked_reason , str ):
270
286
yanked_reason = ""
@@ -278,7 +294,7 @@ def from_json(
278
294
requires_python = pyrequire ,
279
295
yanked_reason = yanked_reason ,
280
296
hashes = hashes ,
281
- dist_info_metadata = dist_info_metadata ,
297
+ metadata_file_data = metadata_file_data ,
282
298
)
283
299
284
300
@classmethod
@@ -298,14 +314,35 @@ def from_element(
298
314
url = _ensure_quoted_url (urllib .parse .urljoin (base_url , href ))
299
315
pyrequire = anchor_attribs .get ("data-requires-python" )
300
316
yanked_reason = anchor_attribs .get ("data-yanked" )
301
- dist_info_metadata = anchor_attribs .get ("data-dist-info-metadata" )
317
+
318
+ # The dist-info-metadata value may be the string "true", or a string of
319
+ # the form "hashname=hashval"
320
+ metadata_info = anchor_attribs .get ("data-dist-info-metadata" )
321
+ if metadata_info == "true" :
322
+ # The file exists, but there are no hashes
323
+ metadata_file_data = MetadataFile (None )
324
+ elif metadata_info is None :
325
+ # The file does not exist
326
+ metadata_file_data = None
327
+ else :
328
+ # The file exists, and hashes have been supplied
329
+ hashname , sep , hashval = metadata_info .partition ("=" )
330
+ if sep == "=" :
331
+ metadata_file_data = MetadataFile ({hashname : hashval })
332
+ else :
333
+ # Error - data is wrong. Treat as no hashes supplied.
334
+ logger .debug (
335
+ "Index returned invalid data-dist-info-metadata value: %s" ,
336
+ metadata_info ,
337
+ )
338
+ metadata_file_data = MetadataFile (None )
302
339
303
340
return cls (
304
341
url ,
305
342
comes_from = page_url ,
306
343
requires_python = pyrequire ,
307
344
yanked_reason = yanked_reason ,
308
- dist_info_metadata = dist_info_metadata ,
345
+ metadata_file_data = metadata_file_data ,
309
346
)
310
347
311
348
def __str__ (self ) -> str :
@@ -407,17 +444,13 @@ def subdirectory_fragment(self) -> Optional[str]:
407
444
return match .group (1 )
408
445
409
446
def metadata_link (self ) -> Optional ["Link" ]:
410
- """Implementation of PEP 658 parsing."""
411
- # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
412
- # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
413
- # gets set.
414
- if self .dist_info_metadata is None :
447
+ """Return a link to the associated core metadata file (if any)."""
448
+ if self .metadata_file_data is None :
415
449
return None
416
450
metadata_url = f"{ self .url_without_fragment } .metadata"
417
- metadata_link_hash = LinkHash .parse_pep658_hash (self .dist_info_metadata )
418
- if metadata_link_hash is None :
451
+ if self .metadata_file_data .hashes is None :
419
452
return Link (metadata_url )
420
- return Link (metadata_url , hashes = metadata_link_hash . as_dict () )
453
+ return Link (metadata_url , hashes = self . metadata_file_data . hashes )
421
454
422
455
def as_hashes (self ) -> Hashes :
423
456
return Hashes ({k : [v ] for k , v in self ._hashes .items ()})
0 commit comments