python · Davda-James · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -1227,6 +1227,13 @@ def test_parse_qs_encoding(self):
                                                           errors="ignore")
         self.assertEqual(result, {'key': ['\u0141-']})
 
+    def test_qsl_strict_parsing_raises(self):
+        with self.assertRaises(ValueError):
+            urllib.parse.parse_qsl("foo", strict_parsing=True)
+
+        with self.assertRaises(ValueError):
+            urllib.parse.parse_qsl(b"foo", strict_parsing=True)
+
     def test_parse_qsl_encoding(self):
         result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
         self.assertEqual(result, [('key', '\u0141\xE9')])

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -91,6 +91,9 @@
 # Unsafe bytes to be removed per WHATWG spec
 _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
 
+# Allowed valid characters in parse_qsl
+_VALID_QUERY_CHARS = "-._~!$&'()*+,;=:@/?%"
+
 def clear_cache():
     """Clear internal performance caches. Undocumented; some tests want it."""
     urlsplit.cache_clear()
@@ -778,6 +781,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
             parsed_result[name] = [value]
     return parsed_result
 
+def _is_valid_query(to_check: str) -> bool:
+    """Return True if all characters are valid per RFC 3986."""
+    for ch in to_check:
+        if not ch.isascii():
+            return False
+        if ch.isalnum() or ch in _VALID_QUERY_CHARS:
+            continue
+        return False
+    return True
 
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
               encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1):
@@ -854,6 +866,11 @@ def _unquote(s):
             name, has_eq, value = name_value.partition(eq)
             if not has_eq and strict_parsing:
                 raise ValueError("bad query field: %r" % (name_value,))
+            if strict_parsing:
+                # Validate RFC3986 characters
+                to_check = (name_value.decode() if isinstance(name_value, bytes) else name_value)
+                if not _is_valid_query(to_check):
+                    raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}")
             if value or keep_blank_values:
                 name = _unquote(name)
                 value = _unquote(value)

diff --git a/Misc/NEWS.d/next/Library/2025-08-31-13-00-22.gh-issue-138284.6MOp4k.rst b/Misc/NEWS.d/next/Library/2025-08-31-13-00-22.gh-issue-138284.6MOp4k.rst
@@ -0,0 +1 @@
+Earlier  urllib.parse.parse_qsl was taking illegal characters like '^' , ' ` ' etc. which should not be the case according to RFC 3986. Hence added the check and now will throw ValueError in case of any illegal characters other than allowed ones. Also written test for it.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Earlier urllib.parse.parse_qsl was taking illegal characters like '^' , ' ` ' etc. which should not be the case according to RFC 3986. Hence added the check and now will throw ValueError in case of any illegal characters other than allowed ones. Also written test for it.
Davda-James marked this conversation as resolved. Outdated Show resolved Hide resolved