@@ -36,15 +36,18 @@ def _quote_byte(error):
3636
3737_ascii_tab_newline_re = re .compile (r'[\t\n\r]' ) # see https://infra.spec.whatwg.org/#ascii-tab-or-newline
3838
39- def safe_url_string (url , encoding = 'utf8' , path_encoding = 'utf8' ):
39+ def safe_url_string (url , encoding = 'utf8' , path_encoding = 'utf8' , quote_path = True ):
4040 """Convert the given URL into a legal URL by escaping unsafe characters
4141 according to RFC-3986. Also, ASCII tabs and newlines are removed
4242 as per https://url.spec.whatwg.org/#url-parsing.
4343
4444 If a bytes URL is given, it is first converted to `str` using the given
45- encoding (which defaults to 'utf-8'). 'utf-8' encoding is used for
46- URL path component (unless overriden by path_encoding), and given
47- encoding is used for query string or form data.
45+ encoding (which defaults to 'utf-8'). If quote_path is True (default),
46+ path_encoding ('utf-8' by default) is used to encode URL path component
47+ which is then quoted. Otherwise, if quote_path is False, path component
48+ is not encoded or quoted. Given encoding is used for query string
49+ or form data.
50+
4851 When passing an encoding, you should use the encoding of the
4952 original page (the page from which the URL was extracted from).
5053
@@ -69,15 +72,18 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
6972 except UnicodeError :
7073 netloc = parts .netloc
7174
75+ # default encoding for path component SHOULD be UTF-8
76+ if quote_path :
77+ path = quote (to_bytes (parts .path , path_encoding ), _safe_chars )
78+ else :
79+ path = to_native_str (parts .path )
80+
7281 # quote() in Python2 return type follows input type;
7382 # quote() in Python3 always returns Unicode (native str)
7483 return urlunsplit ((
7584 to_native_str (parts .scheme ),
7685 to_native_str (netloc ).rstrip (':' ),
77-
78- # default encoding for path component SHOULD be UTF-8
79- quote (to_bytes (parts .path , path_encoding ), _safe_chars ),
80-
86+ path ,
8187 # encoding of query and fragment follows page encoding
8288 # or form-charset (if known and passed)
8389 quote (to_bytes (parts .query , encoding ), _safe_chars ),
0 commit comments