@@ -52,7 +52,7 @@ def _match_vcs_scheme(url: str) -> Optional[str]:
52
52
Returns the matched VCS scheme, or None if there's no match.
53
53
"""
54
54
for scheme in vcs .schemes :
55
- if url .lower ().startswith (scheme ) and url [len (scheme )] in '+:' :
55
+ if url .lower ().startswith (scheme ) and url [len (scheme )] in "+:" :
56
56
return scheme
57
57
return None
58
58
@@ -85,7 +85,7 @@ def _ensure_html_response(url: str, session: PipSession) -> None:
85
85
`_NotHTML` if the content type is not text/html.
86
86
"""
87
87
scheme , netloc , path , query , fragment = urllib .parse .urlsplit (url )
88
- if scheme not in {' http' , ' https' }:
88
+ if scheme not in {" http" , " https" }:
89
89
raise _NotHTTP ()
90
90
91
91
resp = session .head (url , allow_redirects = True )
@@ -110,7 +110,7 @@ def _get_html_response(url: str, session: PipSession) -> Response:
110
110
if is_archive_file (Link (url ).filename ):
111
111
_ensure_html_response (url , session = session )
112
112
113
- logger .debug (' Getting page %s' , redact_auth_from_url (url ))
113
+ logger .debug (" Getting page %s" , redact_auth_from_url (url ))
114
114
115
115
resp = session .get (
116
116
url ,
@@ -145,12 +145,11 @@ def _get_html_response(url: str, session: PipSession) -> Response:
145
145
146
146
147
147
def _get_encoding_from_headers (headers : ResponseHeaders ) -> Optional [str ]:
148
- """Determine if we have any encoding information in our headers.
149
- """
148
+ """Determine if we have any encoding information in our headers."""
150
149
if headers and "Content-Type" in headers :
151
150
content_type , params = cgi .parse_header (headers ["Content-Type" ])
152
151
if "charset" in params :
153
- return params [' charset' ]
152
+ return params [" charset" ]
154
153
return None
155
154
156
155
@@ -195,7 +194,7 @@ def _clean_file_url_path(part: str) -> str:
195
194
196
195
197
196
# percent-encoded: /
198
- _reserved_chars_re = re .compile (' (@|%2F)' , re .IGNORECASE )
197
+ _reserved_chars_re = re .compile (" (@|%2F)" , re .IGNORECASE )
199
198
200
199
201
200
def _clean_url_path (path : str , is_local_path : bool ) -> str :
@@ -212,12 +211,12 @@ def _clean_url_path(path: str, is_local_path: bool) -> str:
212
211
parts = _reserved_chars_re .split (path )
213
212
214
213
cleaned_parts = []
215
- for to_clean , reserved in pairwise (itertools .chain (parts , ['' ])):
214
+ for to_clean , reserved in pairwise (itertools .chain (parts , ["" ])):
216
215
cleaned_parts .append (clean_func (to_clean ))
217
216
# Normalize %xx escapes (e.g. %2f -> %2F)
218
217
cleaned_parts .append (reserved .upper ())
219
218
220
- return '' .join (cleaned_parts )
219
+ return "" .join (cleaned_parts )
221
220
222
221
223
222
def _clean_link (url : str ) -> str :
@@ -248,10 +247,10 @@ def _create_link_from_element(
248
247
return None
249
248
250
249
url = _clean_link (urllib .parse .urljoin (base_url , href ))
251
- pyrequire = anchor .get (' data-requires-python' )
250
+ pyrequire = anchor .get (" data-requires-python" )
252
251
pyrequire = html .unescape (pyrequire ) if pyrequire else None
253
252
254
- yanked_reason = anchor .get (' data-yanked' )
253
+ yanked_reason = anchor .get (" data-yanked" )
255
254
if yanked_reason :
256
255
yanked_reason = html .unescape (yanked_reason )
257
256
@@ -271,8 +270,7 @@ def __init__(self, page: "HTMLPage") -> None:
271
270
self .page = page
272
271
273
272
def __eq__ (self , other : object ) -> bool :
274
- return (isinstance (other , type (self )) and
275
- self .page .url == other .page .url )
273
+ return isinstance (other , type (self )) and self .page .url == other .page .url
276
274
277
275
def __hash__ (self ) -> int :
278
276
return hash (self .page .url )
@@ -353,7 +351,7 @@ def __str__(self) -> str:
353
351
def _handle_get_page_fail (
354
352
link : Link ,
355
353
reason : Union [str , Exception ],
356
- meth : Optional [Callable [..., None ]] = None
354
+ meth : Optional [Callable [..., None ]] = None ,
357
355
) -> None :
358
356
if meth is None :
359
357
meth = logger .debug
@@ -366,7 +364,8 @@ def _make_html_page(response: Response, cache_link_parsing: bool = True) -> HTML
366
364
response .content ,
367
365
encoding = encoding ,
368
366
url = response .url ,
369
- cache_link_parsing = cache_link_parsing )
367
+ cache_link_parsing = cache_link_parsing ,
368
+ )
370
369
371
370
372
371
def _get_html_page (
@@ -377,37 +376,43 @@ def _get_html_page(
377
376
"_get_html_page() missing 1 required keyword argument: 'session'"
378
377
)
379
378
380
- url = link .url .split ('#' , 1 )[0 ]
379
+ url = link .url .split ("#" , 1 )[0 ]
381
380
382
381
# Check for VCS schemes that do not support lookup as web pages.
383
382
vcs_scheme = _match_vcs_scheme (url )
384
383
if vcs_scheme :
385
- logger .warning ('Cannot look at %s URL %s because it does not support '
386
- 'lookup as web pages.' , vcs_scheme , link )
384
+ logger .warning (
385
+ "Cannot look at %s URL %s because it does not support lookup as web pages." ,
386
+ vcs_scheme ,
387
+ link ,
388
+ )
387
389
return None
388
390
389
391
# Tack index.html onto file:// URLs that point to directories
390
392
scheme , _ , path , _ , _ , _ = urllib .parse .urlparse (url )
391
- if ( scheme == ' file' and os .path .isdir (urllib .request .url2pathname (path ) )):
393
+ if scheme == " file" and os .path .isdir (urllib .request .url2pathname (path )):
392
394
# add trailing slash if not present so urljoin doesn't trim
393
395
# final segment
394
- if not url .endswith ('/' ):
395
- url += '/'
396
- url = urllib .parse .urljoin (url , ' index.html' )
397
- logger .debug (' file: URL is directory, getting %s' , url )
396
+ if not url .endswith ("/" ):
397
+ url += "/"
398
+ url = urllib .parse .urljoin (url , " index.html" )
399
+ logger .debug (" file: URL is directory, getting %s" , url )
398
400
399
401
try :
400
402
resp = _get_html_response (url , session = session )
401
403
except _NotHTTP :
402
404
logger .warning (
403
- 'Skipping page %s because it looks like an archive, and cannot '
404
- 'be checked by a HTTP HEAD request.' , link ,
405
+ "Skipping page %s because it looks like an archive, and cannot "
406
+ "be checked by a HTTP HEAD request." ,
407
+ link ,
405
408
)
406
409
except _NotHTML as exc :
407
410
logger .warning (
408
- 'Skipping page %s because the %s request got Content-Type: %s.'
409
- 'The only supported Content-Type is text/html' ,
410
- link , exc .request_desc , exc .content_type ,
411
+ "Skipping page %s because the %s request got Content-Type: %s."
412
+ "The only supported Content-Type is text/html" ,
413
+ link ,
414
+ exc .request_desc ,
415
+ exc .content_type ,
411
416
)
412
417
except NetworkConnectionError as exc :
413
418
_handle_get_page_fail (link , exc )
@@ -422,8 +427,7 @@ def _get_html_page(
422
427
except requests .Timeout :
423
428
_handle_get_page_fail (link , "timed out" )
424
429
else :
425
- return _make_html_page (resp ,
426
- cache_link_parsing = link .cache_link_parsing )
430
+ return _make_html_page (resp , cache_link_parsing = link .cache_link_parsing )
427
431
return None
428
432
429
433
@@ -451,9 +455,10 @@ def __init__(
451
455
452
456
@classmethod
453
457
def create (
454
- cls , session : PipSession ,
458
+ cls ,
459
+ session : PipSession ,
455
460
options : Values ,
456
- suppress_no_index : bool = False
461
+ suppress_no_index : bool = False ,
457
462
) -> "LinkCollector" :
458
463
"""
459
464
:param session: The Session to use to make requests.
@@ -463,19 +468,21 @@ def create(
463
468
index_urls = [options .index_url ] + options .extra_index_urls
464
469
if options .no_index and not suppress_no_index :
465
470
logger .debug (
466
- ' Ignoring indexes: %s' ,
467
- ',' .join (redact_auth_from_url (url ) for url in index_urls ),
471
+ " Ignoring indexes: %s" ,
472
+ "," .join (redact_auth_from_url (url ) for url in index_urls ),
468
473
)
469
474
index_urls = []
470
475
471
476
# Make sure find_links is a list before passing to create().
472
477
find_links = options .find_links or []
473
478
474
479
search_scope = SearchScope .create (
475
- find_links = find_links , index_urls = index_urls ,
480
+ find_links = find_links ,
481
+ index_urls = index_urls ,
476
482
)
477
483
link_collector = LinkCollector (
478
- session = session , search_scope = search_scope ,
484
+ session = session ,
485
+ search_scope = search_scope ,
479
486
)
480
487
return link_collector
481
488
0 commit comments