Skip to content

Commit 1dddddb

Browse files
authored
Merge pull request #136 from scrapy/strip-spaces-in-canonicalize-url
Strip spaces in canonicalize_url
2 parents 4ba3539 + 849bae1 commit 1dddddb

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

tests/test_url.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,17 @@ def test_preserve_nonfragment_hash(self):
10851085
"http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2F%2Fpath%2Fto%2F%23%2Fbar%2Ffoo#frag",
10861086
)
10871087

1088+
def test_strip_spaces(self):
1089+
self.assertEqual(
1090+
canonicalize_url(" https://example.com"), "https://example.com/"
1091+
)
1092+
self.assertEqual(
1093+
canonicalize_url("https://example.com "), "https://example.com/"
1094+
)
1095+
self.assertEqual(
1096+
canonicalize_url(" https://example.com "), "https://example.com/"
1097+
)
1098+
10881099

10891100
class DataURITests(unittest.TestCase):
10901101
def test_default_mediatype_charset(self):

w3lib/url.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,8 @@ def canonicalize_url(
538538
# UTF-8 can handle all Unicode characters,
539539
# so we should be covered regarding URL normalization,
540540
# if not for proper URL expected by remote website.
541+
if isinstance(url, str):
542+
url = url.strip()
541543
try:
542544
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
543545
parse_url(url), encoding=encoding or "utf8"

0 commit comments

Comments
 (0)