Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,13 @@ def test_parse_qs_encoding(self):
errors="ignore")
self.assertEqual(result, {'key': ['\u0141-']})

def test_qsl_strict_parsing_raises(self):
with self.assertRaises(ValueError):
urllib.parse.parse_qsl("foo", strict_parsing=True)

with self.assertRaises(ValueError):
urllib.parse.parse_qsl(b"foo", strict_parsing=True)

def test_parse_qsl_encoding(self):
result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
self.assertEqual(result, [('key', '\u0141\xE9')])
Expand Down
8 changes: 8 additions & 0 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

# Allowed valid characters in parse_qsl
_VALID_QUERY_CHARS = re.compile(r"^[A-Za-z0-9\-._~!$&'()*+,;=:@/?%]*$")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be replaced with str.isascii, str.isdecimal and a strings with the others, this should be faster.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I will do it and add new commit.


def clear_cache():
"""Clear internal performance caches. Undocumented; some tests want it."""
urlsplit.cache_clear()
Expand Down Expand Up @@ -854,6 +857,11 @@ def _unquote(s):
name, has_eq, value = name_value.partition(eq)
if not has_eq and strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
if strict_parsing:
# Validate RFC3986 characters
to_check = (name_value.decode() if isinstance(name_value, bytes) else name_value)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use _unquote as this handles the %-encoded values and takes care of the encoding parameter as well.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if strict_parsing:
# Validate RFC3986 characters
to_check = _unquote(name_value)
if isinstance(to_check, (bytes, bytearray)):
to_check = to_check.decode(encoding, errors)
if not _is_valid_rfc3986_query(to_check): using like this is it good as we need to decode back as _unquote returns bytes and _is_valid_rfc3986_query accepts the string ?

if not _VALID_QUERY_CHARS.match(to_check):
raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}")
if value or keep_blank_values:
name = _unquote(name)
value = _unquote(value)
Expand Down
Loading