@@ -411,6 +411,45 @@ def test_clean_link(url, clean_url):
411
411
assert _clean_link (url ) == clean_url
412
412
413
413
414
+ def _test_parse_links_data_attribute (anchor_html , attr , expected ):
415
+ html = f'<html><head><meta charset="utf-8"><head><body>{ anchor_html } </body></html>'
416
+ html_bytes = html .encode ("utf-8" )
417
+ page = HTMLPage (
418
+ html_bytes ,
419
+ encoding = None ,
420
+ # parse_links() is cached by url, so we inject a random uuid to ensure
421
+ # the page content isn't cached.
422
+ url = f"https://example.com/simple-{ uuid .uuid4 ()} /" ,
423
+ )
424
+ links = list (parse_links (page ))
425
+ (link ,) = links
426
+ actual = getattr (link , attr )
427
+ assert actual == expected
428
+
429
+
430
+ @pytest .mark .parametrize (
431
+ "anchor_html, expected" ,
432
+ [
433
+ # Test not present.
434
+ ('<a href="/pkg-1.0.tar.gz"></a>' , None ),
435
+ # Test present with no value.
436
+ ('<a href="/pkg-1.0.tar.gz" data-requires-python></a>' , None ),
437
+ # Test a value with an escaped character.
438
+ (
439
+ '<a href="/pkg-1.0.tar.gz" data-requires-python=">=3.6"></a>' ,
440
+ ">=3.6" ,
441
+ ),
442
+ # Test requires python is unescaped once.
443
+ (
444
+ '<a href="/pkg-1.0.tar.gz" data-requires-python="&gt;=3.6"></a>' ,
445
+ ">=3.6" ,
446
+ ),
447
+ ],
448
+ )
449
+ def test_parse_links__requires_python (anchor_html , expected ):
450
+ _test_parse_links_data_attribute (anchor_html , "requires_python" , expected )
451
+
452
+
414
453
@pytest .mark .parametrize (
415
454
"anchor_html, expected" ,
416
455
[
@@ -429,27 +468,15 @@ def test_clean_link(url, clean_url):
429
468
'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018 "></a>' ,
430
469
"curlyquote \u2018 " ,
431
470
),
471
+ # Test yanked reason is unescaped once.
472
+ (
473
+ '<a href="/pkg-1.0.tar.gz" data-yanked="version &lt; 1"></a>' ,
474
+ "version < 1" ,
475
+ ),
432
476
],
433
477
)
434
478
def test_parse_links__yanked_reason (anchor_html , expected ):
435
- html = (
436
- # Mark this as a unicode string for Python 2 since anchor_html
437
- # can contain non-ascii.
438
- '<html><head><meta charset="utf-8"><head>'
439
- "<body>{}</body></html>"
440
- ).format (anchor_html )
441
- html_bytes = html .encode ("utf-8" )
442
- page = HTMLPage (
443
- html_bytes ,
444
- encoding = None ,
445
- # parse_links() is cached by url, so we inject a random uuid to ensure
446
- # the page content isn't cached.
447
- url = f"https://example.com/simple-{ uuid .uuid4 ()} /" ,
448
- )
449
- links = list (parse_links (page ))
450
- (link ,) = links
451
- actual = link .yanked_reason
452
- assert actual == expected
479
+ _test_parse_links_data_attribute (anchor_html , "yanked_reason" , expected )
453
480
454
481
455
482
def test_parse_links_caches_same_page_by_url ():
0 commit comments