- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 33.3k
gh-138284 : urllib.parse.parse_qsl now raises ValueError if illegal characters is passed, according to RFC 3986 #138291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
79f25b9
              345e86b
              4934ff2
              cf763db
              5cea514
              f42744b
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -91,6 +91,9 @@ | |
| # Unsafe bytes to be removed per WHATWG spec | ||
| _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] | ||
|  | ||
| # Allowed valid characters in parse_qsl | ||
|         
                  Davda-James marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| _VALID_QUERY_CHARS = "-._~!$&'()*+,;=:@/?%" | ||
|         
                  Davda-James marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
|  | ||
| def clear_cache(): | ||
| """Clear internal performance caches. Undocumented; some tests want it.""" | ||
| urlsplit.cache_clear() | ||
|  | @@ -778,6 +781,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, | |
| parsed_result[name] = [value] | ||
| return parsed_result | ||
|  | ||
| def _is_valid_query(to_check: str) -> bool: | ||
|         
                  Davda-James marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| """Return True if all characters are valid per RFC 3986.""" | ||
| for ch in to_check: | ||
| if not ch.isascii(): | ||
| return False | ||
| if ch.isalnum() or ch in _VALID_QUERY_CHARS: | ||
| continue | ||
| return False | ||
| return True | ||
|  | ||
| def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | ||
| encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1): | ||
|  | @@ -854,6 +866,11 @@ def _unquote(s): | |
| name, has_eq, value = name_value.partition(eq) | ||
| if not has_eq and strict_parsing: | ||
| raise ValueError("bad query field: %r" % (name_value,)) | ||
| if strict_parsing: | ||
| # Validate RFC3986 characters | ||
| to_check = (name_value.decode() if isinstance(name_value, bytes) else name_value) | ||
|          | ||
| if not _is_valid_query(to_check): | ||
| raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}") | ||
| if value or keep_blank_values: | ||
| name = _unquote(name) | ||
| value = _unquote(value) | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Earlier urllib.parse.parse_qsl was taking illegal characters like '^' , ' ` ' etc. which should not be the case according to RFC 3986. Hence added the check and now will throw ValueError in case of any illegal characters other than allowed ones. Also written test for it. | ||
|         
                  Davda-James marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
Uh oh!
There was an error while loading. Please reload this page.