Skip to content

Commit 4934ff2

Browse files
committed
replaced regex with char.isascii() and char.isalnum() and manual check for performance
1 parent 345e86b commit 4934ff2

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

Lib/urllib/parse.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
9393

9494
# Allowed valid characters in parse_qsl
95-
_VALID_QUERY_CHARS = re.compile(r"^[A-Za-z0-9\-._~!$&'()*+,;=:@/?%]*$")
95+
_VALID_QUERY_CHARS = "-._~!$&'()*+,;=:@/?%"
9696

9797
def clear_cache():
9898
"""Clear internal performance caches. Undocumented; some tests want it."""
@@ -781,6 +781,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
781781
parsed_result[name] = [value]
782782
return parsed_result
783783

784+
def _is_valid_query(to_check: str) -> bool:
785+
"""Return True if all characters are valid per RFC 3986."""
786+
for ch in to_check:
787+
if not ch.isascii():
788+
return False
789+
if ch.isalnum() or ch in _VALID_QUERY_CHARS:
790+
continue
791+
return False
792+
return True
784793

785794
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
786795
encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1):
@@ -860,7 +869,7 @@ def _unquote(s):
860869
if strict_parsing:
861870
# Validate RFC3986 characters
862871
to_check = (name_value.decode() if isinstance(name_value, bytes) else name_value)
863-
if not _VALID_QUERY_CHARS.match(to_check):
872+
if not _is_valid_query(to_check):
864873
raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}")
865874
if value or keep_blank_values:
866875
name = _unquote(name)

0 commit comments

Comments
 (0)