Skip to content

Commit bad299a

Browse files
committed
Remove # only from path-specific safe characters
1 parent b8c753f commit bad299a

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

w3lib/url.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@ def _quote_byte(error):
3333
EXTRA_SAFE_CHARS = b'|' # see https://github.com/scrapy/w3lib/pull/25
3434

3535
_safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b'%'
36-
37-
# see https://github.com/scrapy/w3lib/issues/91
38-
_safe_chars = _safe_chars.replace(b'#', b'')
36+
_path_safe_chars = _safe_chars.replace(b'#', b'')
3937

4038
_ascii_tab_newline_re = re.compile(r'[\t\n\r]') # see https://infra.spec.whatwg.org/#ascii-tab-or-newline
4139

@@ -417,7 +415,7 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
417415
to_native_str(netloc),
418416

419417
# default encoding for path component SHOULD be UTF-8
420-
quote(to_bytes(parts.path, path_encoding), _safe_chars),
418+
quote(to_bytes(parts.path, path_encoding), _path_safe_chars),
421419
quote(to_bytes(parts.params, path_encoding), _safe_chars),
422420

423421
# encoding of query and fragment follows page encoding
@@ -505,7 +503,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
505503
# 2. decode percent-encoded sequences in path as UTF-8 (or keep raw bytes)
506504
# and percent-encode path again (this normalizes to upper-case %XX)
507505
uqp = _unquotepath(path)
508-
path = quote(uqp, _safe_chars) or '/'
506+
path = quote(uqp, _path_safe_chars) or '/'
509507

510508
fragment = '' if not keep_fragments else fragment
511509

0 commit comments

Comments
 (0)