Skip to content

Commit a1994d8

Browse files
committed
httputil: Make parse_request_start_line stricter
The method is now restricted to being valid token characters as defined in RFC 9110, allowing us to correctly issue status code 400 or 405 as appropriate (this can make a difference with some caching proxies). The request-target no longer allows control characters. This is less strict than the RFC (which does not allow non-ascii characters), but prioritizes backwards compatibility. Fixes tornadoweb#3415 Closes tornadoweb#3338
1 parent 5a271f2 commit a1994d8

File tree

1 file changed

+16
-11
lines changed

1 file changed

+16
-11
lines changed

tornado/httputil.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,22 @@ class _ABNF:
8686

8787
# RFC 9110 (HTTP Semantics)
8888
obs_text = re.compile(r"[\x80-\xFF]")
89+
field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
8990
tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
9091
token = re.compile(rf"{tchar.pattern}+")
9192
field_name = token
93+
method = token
9294

9395
# RFC 9112 (HTTP/1.1)
9496
HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
9597
reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
98+
# request_target delegates to the URI RFC 3986, which is complex and may be
99+
# too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
100+
# characters). Instead, we allow everything but control chars and whitespace.
101+
request_target = re.compile(rf"{field_vchar.pattern}+")
102+
request_line = re.compile(
103+
rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
104+
)
96105
status_code = re.compile(r"[0-9]{3}")
97106
status_line = re.compile(
98107
rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
@@ -170,7 +179,7 @@ def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
170179

171180
def add(self, name: str, value: str) -> None:
172181
"""Adds a new value for the given key."""
173-
if not _ABNF.token.fullmatch(name):
182+
if not _ABNF.field_name.fullmatch(name):
174183
raise HTTPInputError("Invalid header name %r" % name)
175184
norm_name = _normalize_header(name)
176185
self._last_key = norm_name
@@ -925,22 +934,18 @@ def parse_request_start_line(line: str) -> RequestStartLine:
925934
>>> parse_request_start_line("GET /foo HTTP/1.1")
926935
RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
927936
"""
928-
try:
929-
method, path, version = line.split(" ")
930-
except ValueError:
937+
match = _ABNF.request_line.fullmatch(line)
938+
if not match:
931939
# https://tools.ietf.org/html/rfc7230#section-3.1.1
932940
# invalid request-line SHOULD respond with a 400 (Bad Request)
933941
raise HTTPInputError("Malformed HTTP request line")
934-
if not _ABNF.HTTP_version.fullmatch(version):
935-
raise HTTPInputError(
936-
"Malformed HTTP version in HTTP Request-Line: %r" % version
937-
)
938-
if not version.startswith("HTTP/1"):
942+
r = RequestStartLine(match.group(1), match.group(2), match.group(3))
943+
if not r.version.startswith("HTTP/1"):
939944
# HTTP/2 and above doesn't use parse_request_start_line.
940945
# This could be folded into the regex but we don't want to deviate
941946
# from the ABNF in the RFCs.
942-
raise HTTPInputError("Unexpected HTTP version %r" % version)
943-
return RequestStartLine(method, path, version)
947+
raise HTTPInputError("Unexpected HTTP version %r" % r.version)
948+
return r
944949

945950

946951
class ResponseStartLine(typing.NamedTuple):

0 commit comments

Comments
 (0)