Skip to content

Commit aa66a67

Browse files
authored
Merge pull request #3473 from bdarnell/http-abnf
httputil: Make parse_request_start_line stricter
2 parents cf4975b + 34d5ffc commit aa66a67

File tree

2 files changed

+84
-20
lines changed

2 files changed

+84
-20
lines changed

tornado/httputil.py

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,41 @@
7171
# To be used with str.strip() and related methods.
7272
HTTP_WHITESPACE = " \t"
7373

74-
HTTP_TOKEN_RE = re.compile(r"^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$")
74+
75+
class _ABNF:
76+
"""Class that holds a subset of ABNF rules from RFC 9110 and friends.
77+
78+
Class attributes are re.Pattern objects, with the same name as in the RFC
79+
(with hyphens changed to underscores). Currently contains only the subset
80+
we use (which is why this class is not public). Unfortunately the fields
81+
cannot be alphabetized as they are in the RFCs because of dependencies.
82+
"""
83+
84+
# RFC 5234 (ABNF)
85+
VCHAR = re.compile(r"[\x21-\x7E]")
86+
87+
# RFC 9110 (HTTP Semantics)
88+
obs_text = re.compile(r"[\x80-\xFF]")
89+
field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
90+
tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
91+
token = re.compile(rf"{tchar.pattern}+")
92+
field_name = token
93+
method = token
94+
95+
# RFC 9112 (HTTP/1.1)
96+
HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
97+
reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
98+
# request_target delegates to the URI RFC 3986, which is complex and may be
99+
# too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
100+
# characters). Instead, we allow everything but control chars and whitespace.
101+
request_target = re.compile(rf"{field_vchar.pattern}+")
102+
request_line = re.compile(
103+
rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
104+
)
105+
status_code = re.compile(r"[0-9]{3}")
106+
status_line = re.compile(
107+
rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
108+
)
75109

76110

77111
@lru_cache(1000)
@@ -145,7 +179,7 @@ def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
145179

146180
def add(self, name: str, value: str) -> None:
147181
"""Adds a new value for the given key."""
148-
if not HTTP_TOKEN_RE.match(name):
182+
if not _ABNF.field_name.fullmatch(name):
149183
raise HTTPInputError("Invalid header name %r" % name)
150184
norm_name = _normalize_header(name)
151185
self._last_key = norm_name
@@ -892,9 +926,6 @@ class RequestStartLine(typing.NamedTuple):
892926
version: str
893927

894928

895-
_http_version_re = re.compile(r"^HTTP/1\.[0-9]$")
896-
897-
898929
def parse_request_start_line(line: str) -> RequestStartLine:
899930
"""Returns a (method, path, version) tuple for an HTTP 1.x request line.
900931
@@ -903,17 +934,18 @@ def parse_request_start_line(line: str) -> RequestStartLine:
903934
>>> parse_request_start_line("GET /foo HTTP/1.1")
904935
RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
905936
"""
906-
try:
907-
method, path, version = line.split(" ")
908-
except ValueError:
937+
match = _ABNF.request_line.fullmatch(line)
938+
if not match:
909939
# https://tools.ietf.org/html/rfc7230#section-3.1.1
910940
# invalid request-line SHOULD respond with a 400 (Bad Request)
911941
raise HTTPInputError("Malformed HTTP request line")
912-
if not _http_version_re.match(version):
913-
raise HTTPInputError(
914-
"Malformed HTTP version in HTTP Request-Line: %r" % version
915-
)
916-
return RequestStartLine(method, path, version)
942+
r = RequestStartLine(match.group(1), match.group(2), match.group(3))
943+
if not r.version.startswith("HTTP/1"):
944+
# HTTP/2 and above doesn't use parse_request_start_line.
945+
# This could be folded into the regex but we don't want to deviate
946+
# from the ABNF in the RFCs.
947+
raise HTTPInputError("Unexpected HTTP version %r" % r.version)
948+
return r
917949

918950

919951
class ResponseStartLine(typing.NamedTuple):
@@ -922,9 +954,6 @@ class ResponseStartLine(typing.NamedTuple):
922954
reason: str
923955

924956

925-
_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)")
926-
927-
928957
def parse_response_start_line(line: str) -> ResponseStartLine:
929958
"""Returns a (version, code, reason) tuple for an HTTP 1.x response line.
930959
@@ -933,11 +962,14 @@ def parse_response_start_line(line: str) -> ResponseStartLine:
933962
>>> parse_response_start_line("HTTP/1.1 200 OK")
934963
ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
935964
"""
936-
line = native_str(line)
937-
match = _http_response_line_re.match(line)
965+
match = _ABNF.status_line.fullmatch(line)
938966
if not match:
939967
raise HTTPInputError("Error parsing response start line")
940-
return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
968+
r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
969+
if not r.version.startswith("HTTP/1"):
970+
# HTTP/2 and above doesn't use parse_response_start_line.
971+
raise HTTPInputError("Unexpected HTTP version %r" % r.version)
972+
return r
941973

942974

943975
# _parseparam and _parse_header are copied and modified from python2.7's cgi.py

tornado/test/httpserver_test.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from tornado.test.util import abstract_base_test
3131
from tornado.web import Application, RequestHandler, stream_request_body
3232

33-
from contextlib import closing
33+
from contextlib import closing, contextmanager
3434
import datetime
3535
import gzip
3636
import logging
@@ -634,6 +634,38 @@ def test_invalid_content_length(self):
634634
)
635635
yield stream.read_until_close()
636636

637+
@gen_test
638+
def test_invalid_methods(self):
639+
# RFC 9110 distinguishes between syntactically invalid methods and those that are
640+
# valid but unknown. The former must give a 400 status code, while the latter should
641+
# give a 405.
642+
test_cases = [
643+
("FOO", 405, None),
644+
("FOO,BAR", 400, ".*Malformed HTTP request line"),
645+
]
646+
for method, code, log_msg in test_cases:
647+
if log_msg is not None:
648+
expect_log = ExpectLog(gen_log, log_msg, level=logging.INFO)
649+
else:
650+
651+
@contextmanager
652+
def noop_context():
653+
yield
654+
655+
expect_log = noop_context() # type: ignore
656+
with (
657+
self.subTest(method=method),
658+
closing(IOStream(socket.socket())) as stream,
659+
expect_log,
660+
):
661+
yield stream.connect(("127.0.0.1", self.get_http_port()))
662+
stream.write(utf8(f"{method} /echo HTTP/1.1\r\n\r\n"))
663+
resp = yield stream.read_until(b"\r\n\r\n")
664+
self.assertTrue(
665+
resp.startswith(b"HTTP/1.1 %d" % code),
666+
f"expected status code {code} in {resp!r}",
667+
)
668+
637669

638670
class XHeaderTest(HandlerBaseTestCase):
639671
class Handler(RequestHandler):

0 commit comments

Comments
 (0)