Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,13 @@ def test_parse_qs_encoding(self):
errors="ignore")
self.assertEqual(result, {'key': ['\u0141-']})

def test_qsl_strict_parsing_raises(self):
with self.assertRaises(ValueError):
urllib.parse.parse_qsl("foo", strict_parsing=True)

with self.assertRaises(ValueError):
urllib.parse.parse_qsl(b"foo", strict_parsing=True)

def test_parse_qsl_encoding(self):
result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
self.assertEqual(result, [('key', '\u0141\xE9')])
Expand Down
17 changes: 17 additions & 0 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

# Allowed valid characters in parse_qsl
_VALID_QUERY_CHARS = "-._~!$&'()*+,;=:@/?%"

def clear_cache():
"""Clear internal performance caches. Undocumented; some tests want it."""
urlsplit.cache_clear()
Expand Down Expand Up @@ -778,6 +781,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
parsed_result[name] = [value]
return parsed_result

def _is_valid_query(to_check: str) -> bool:
"""Return True if all characters are valid per RFC 3986."""
for ch in to_check:
if not ch.isascii():
return False
if ch.isalnum() or ch in _VALID_QUERY_CHARS:
continue
return False
return True

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1):
Expand Down Expand Up @@ -854,6 +866,11 @@ def _unquote(s):
name, has_eq, value = name_value.partition(eq)
if not has_eq and strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
if strict_parsing:
# Validate RFC3986 characters
to_check = (name_value.decode() if isinstance(name_value, bytes) else name_value)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use _unquote as this handles the %-encoded values and takes care of the encoding parameter as well.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if strict_parsing:
# Validate RFC3986 characters
to_check = _unquote(name_value)
if isinstance(to_check, (bytes, bytearray)):
to_check = to_check.decode(encoding, errors)
if not _is_valid_rfc3986_query(to_check): using like this is it good as we need to decode back as _unquote returns bytes and _is_valid_rfc3986_query accepts the string ?

if not _is_valid_query(to_check):
raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}")
if value or keep_blank_values:
name = _unquote(name)
value = _unquote(value)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Earlier urllib.parse.parse_qsl was taking illegal characters like '^' , ' ` ' etc. which should not be the case according to RFC 3986. Hence added the check and now will throw ValueError in case of any illegal characters other than allowed ones. Also written test for it.
Loading