11import os
22import unittest
3+ from inspect import isclass
4+ from typing import Optional , Union , Type , Callable , Tuple , List
35from urllib .parse import urlparse
46
57import pytest
1012 _ASCII_TAB_OR_NEWLINE ,
1113 _C0_CONTROL_OR_SPACE ,
1214)
15+ from w3lib ._types import StrOrBytes
1316from w3lib ._url import _SPECIAL_SCHEMES
1417from w3lib .url import (
1518 add_or_replace_parameter ,
2730 url_query_cleaner ,
2831)
2932
30-
31- UNSET = object ()
32-
3333# Test cases for URL-to-safe-URL conversions with a URL and an encoding as
3434# input parameters.
3535#
3636# (encoding, input URL, output URL or exception)
37- SAFE_URL_ENCODING_CASES = (
38- (UNSET , "" , ValueError ),
39- (UNSET , "https://example.com" , "https://example.com" ),
40- (UNSET , "https://example.com/©" , "https://example.com/%C2%A9" ),
37+ SAFE_URL_ENCODING_CASES : List [
38+ Tuple [Optional [str ], StrOrBytes , Union [str , Type [Exception ]]]
39+ ] = [
40+ (None , "" , ValueError ),
41+ (None , "https://example.com" , "https://example.com" ),
42+ (None , "https://example.com/©" , "https://example.com/%C2%A9" ),
4143 # Paths are always UTF-8-encoded.
4244 ("iso-8859-1" , "https://example.com/©" , "https://example.com/%C2%A9" ),
4345 # Queries are UTF-8-encoded if the scheme is not special, ws or wss.
5355 ),
5456 # Fragments are always UTF-8-encoded.
5557 ("iso-8859-1" , "https://example.com#©" , "https://example.com#%C2%A9" ),
56- )
58+ ]
5759
5860INVALID_SCHEME_FOLLOW_UPS = "" .join (
5961 chr (value )
315317)
316318
317319
318- def _test_safe_url_func (url , * , encoding = UNSET , output , func ):
320+ def _test_safe_url_func (
321+ url : StrOrBytes ,
322+ * ,
323+ encoding : Optional [str ] = None ,
324+ output : Union [str , Type [Exception ]],
325+ func : Callable [..., str ],
326+ ) -> None :
319327 kwargs = {}
320- if encoding is not UNSET :
328+ if encoding is not None :
321329 kwargs ["encoding" ] = encoding
322- try :
323- is_exception = issubclass (output , Exception )
324- except TypeError :
325- is_exception = False
326- if is_exception :
330+ if isclass (output ) and issubclass (output , Exception ):
327331 with pytest .raises (output ):
328332 func (url , ** kwargs )
329333 return
@@ -332,7 +336,12 @@ def _test_safe_url_func(url, *, encoding=UNSET, output, func):
332336 assert func (actual , ** kwargs ) == output # Idempotency
333337
334338
335- def _test_safe_url_string (url , * , encoding = UNSET , output ):
339+ def _test_safe_url_string (
340+ url : StrOrBytes ,
341+ * ,
342+ encoding : Optional [str ] = None ,
343+ output : Union [str , Type [Exception ]],
344+ ) -> None :
336345 return _test_safe_url_func (
337346 url ,
338347 encoding = encoding ,
@@ -342,7 +351,7 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
342351
343352
344353KNOWN_SAFE_URL_STRING_ENCODING_ISSUES = {
345- (UNSET , "" ), # Invalid URL
354+ (None , "" ), # Invalid URL
346355 # UTF-8 encoding is not enforced in non-special URLs, or in URLs with the
347356 # ws or wss schemas.
348357 ("iso-8859-1" , "a://example.com?\xa9 " ),
@@ -362,7 +371,9 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
362371 for case in SAFE_URL_ENCODING_CASES
363372 ),
364373)
365- def test_safe_url_string_encoding (encoding , url , output ):
374+ def test_safe_url_string_encoding (
375+ encoding : Optional [str ], url : StrOrBytes , output : Union [str , Type [Exception ]]
376+ ) -> None :
366377 _test_safe_url_string (url , encoding = encoding , output = output )
367378
368379
@@ -421,7 +432,9 @@ def test_safe_url_string_encoding(encoding, url, output):
421432 for case in SAFE_URL_URL_CASES
422433 ),
423434)
424- def test_safe_url_string_url (url , output ):
435+ def test_safe_url_string_url (
436+ url : StrOrBytes , output : Union [str , Type [Exception ]]
437+ ) -> None :
425438 _test_safe_url_string (url , output = output )
426439
427440
0 commit comments