Skip to content

Commit 4ce700a

Browse files
committed
httputil: Process the Host header more strictly
- It is now an error to have multiple Host headers - The Host header is now mandatory except in HTTP/1.0 mode - Host headers containing characters that are disallowed by RFC 3986 are now rejected Fixes #3468
1 parent 1161f9b commit 4ce700a

File tree

3 files changed

+74
-15
lines changed

3 files changed

+74
-15
lines changed

tornado/httputil.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ class _ABNF:
8181
cannot be alphabetized as they are in the RFCs because of dependencies.
8282
"""
8383

84+
# RFC 3986 (URI)
85+
# The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
86+
# literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
87+
# it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
88+
# position, not only for their use in IPv6 literals.
89+
uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
90+
uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
91+
uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
92+
uri_host = re.compile(
93+
rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
94+
)
95+
uri_port = re.compile(r"[0-9]*")
96+
8497
# RFC 5234 (ABNF)
8598
VCHAR = re.compile(r"[\x21-\x7E]")
8699

@@ -91,6 +104,7 @@ class _ABNF:
91104
token = re.compile(rf"{tchar.pattern}+")
92105
field_name = token
93106
method = token
107+
host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
94108

95109
# RFC 9112 (HTTP/1.1)
96110
HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
@@ -421,7 +435,7 @@ def __init__(
421435
version: str = "HTTP/1.0",
422436
headers: Optional[HTTPHeaders] = None,
423437
body: Optional[bytes] = None,
424-
host: Optional[str] = None,
438+
# host: Optional[str] = None,
425439
files: Optional[Dict[str, List["HTTPFile"]]] = None,
426440
connection: Optional["HTTPConnection"] = None,
427441
start_line: Optional["RequestStartLine"] = None,
@@ -440,7 +454,35 @@ def __init__(
440454
self.remote_ip = getattr(context, "remote_ip", None)
441455
self.protocol = getattr(context, "protocol", "http")
442456

443-
self.host = host or self.headers.get("Host") or "127.0.0.1"
457+
try:
458+
self.host = self.headers["Host"]
459+
except KeyError:
460+
if version == "HTTP/1.0":
461+
# HTTP/1.0 does not require the Host header.
462+
self.host = "127.0.0.1"
463+
else:
464+
raise HTTPInputError("Missing Host header")
465+
if not _ABNF.host.fullmatch(self.host):
466+
print(_ABNF.host.pattern)
467+
raise HTTPInputError("Invalid Host header: %r" % self.host)
468+
if "," in self.host:
469+
# https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
470+
# Server MUST respond with 400 Bad Request if multiple
471+
# Host headers are present.
472+
#
473+
# We test for the presence of a comma instead of the number of
474+
# headers received because a proxy may have converted
475+
# multiple headers into a single comma-separated value
476+
# (per RFC 9110 section 5.3).
477+
#
478+
# This is technically a departure from the RFC since the ABNF
479+
# does not forbid commas in the host header. However, since
480+
# commas are not allowed in DNS names, it is appropriate to
481+
# disallow them. (The same argument could be made for other special
482+
# characters, but commas are the most problematic since they could
483+
# be used to exploit differences between proxies when multiple headers
484+
# are supplied).
485+
raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
444486
self.host_name = split_host_and_port(self.host.lower())[0]
445487
self.files = files or {}
446488
self.connection = connection

tornado/test/httpserver_test.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ def test_100_continue(self):
267267
b"\r\n".join(
268268
[
269269
b"POST /hello HTTP/1.1",
270+
b"Host: 127.0.0.1",
270271
b"Content-Length: 1024",
271272
b"Expect: 100-continue",
272273
b"Connection: close",
@@ -467,6 +468,7 @@ def test_chunked_request_body(self):
467468
self.stream.write(
468469
b"""\
469470
POST /echo HTTP/1.1
471+
Host: 127.0.0.1
470472
Transfer-Encoding: chunked
471473
Content-Type: application/x-www-form-urlencoded
472474
@@ -491,6 +493,7 @@ def test_chunked_request_uppercase(self):
491493
self.stream.write(
492494
b"""\
493495
POST /echo HTTP/1.1
496+
Host: 127.0.0.1
494497
Transfer-Encoding: Chunked
495498
Content-Type: application/x-www-form-urlencoded
496499
@@ -515,6 +518,7 @@ def test_chunked_request_body_invalid_size(self):
515518
self.stream.write(
516519
b"""\
517520
POST /echo HTTP/1.1
521+
Host: 127.0.0.1
518522
Transfer-Encoding: chunked
519523
520524
1_a
@@ -537,6 +541,7 @@ def test_chunked_request_body_duplicate_header(self):
537541
self.stream.write(
538542
b"""\
539543
POST /echo HTTP/1.1
544+
Host: 127.0.0.1
540545
Transfer-Encoding: chunked
541546
Transfer-encoding: chunked
542547
@@ -561,6 +566,7 @@ def test_chunked_request_body_unsupported_transfer_encoding(self):
561566
self.stream.write(
562567
b"""\
563568
POST /echo HTTP/1.1
569+
Host: 127.0.0.1
564570
Transfer-Encoding: gzip, chunked
565571
566572
2
@@ -582,6 +588,7 @@ def test_chunked_request_body_transfer_encoding_and_content_length(self):
582588
self.stream.write(
583589
b"""\
584590
POST /echo HTTP/1.1
591+
Host: 127.0.0.1
585592
Transfer-Encoding: chunked
586593
Content-Length: 2
587594
@@ -624,6 +631,7 @@ def test_invalid_content_length(self):
624631
textwrap.dedent(
625632
f"""\
626633
POST /echo HTTP/1.1
634+
Host: 127.0.0.1
627635
Content-Length: {value}
628636
Connection: close
629637
@@ -659,7 +667,7 @@ def noop_context():
659667
expect_log,
660668
):
661669
yield stream.connect(("127.0.0.1", self.get_http_port()))
662-
stream.write(utf8(f"{method} /echo HTTP/1.1\r\n\r\n"))
670+
stream.write(utf8(f"{method} /echo HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"))
663671
resp = yield stream.read_until(b"\r\n\r\n")
664672
self.assertTrue(
665673
resp.startswith(b"HTTP/1.1 %d" % code),
@@ -968,16 +976,18 @@ def close(self):
968976
@gen_test
969977
def test_two_requests(self):
970978
yield self.connect()
971-
self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
979+
self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
972980
yield self.read_response()
973-
self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
981+
self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
974982
yield self.read_response()
975983
self.close()
976984

977985
@gen_test
978986
def test_request_close(self):
979987
yield self.connect()
980-
self.stream.write(b"GET / HTTP/1.1\r\nConnection: close\r\n\r\n")
988+
self.stream.write(
989+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\nConnection: close\r\n\r\n"
990+
)
981991
yield self.read_response()
982992
data = yield self.stream.read_until_close()
983993
self.assertTrue(not data)
@@ -1023,31 +1033,35 @@ def test_http10_keepalive_extra_crlf(self):
10231033
@gen_test
10241034
def test_pipelined_requests(self):
10251035
yield self.connect()
1026-
self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
1036+
self.stream.write(
1037+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
1038+
)
10271039
yield self.read_response()
10281040
yield self.read_response()
10291041
self.close()
10301042

10311043
@gen_test
10321044
def test_pipelined_cancel(self):
10331045
yield self.connect()
1034-
self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
1046+
self.stream.write(
1047+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
1048+
)
10351049
# only read once
10361050
yield self.read_response()
10371051
self.close()
10381052

10391053
@gen_test
10401054
def test_cancel_during_download(self):
10411055
yield self.connect()
1042-
self.stream.write(b"GET /large HTTP/1.1\r\n\r\n")
1056+
self.stream.write(b"GET /large HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10431057
yield self.read_headers()
10441058
yield self.stream.read_bytes(1024)
10451059
self.close()
10461060

10471061
@gen_test
10481062
def test_finish_while_closed(self):
10491063
yield self.connect()
1050-
self.stream.write(b"GET /finish_on_close HTTP/1.1\r\n\r\n")
1064+
self.stream.write(b"GET /finish_on_close HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10511065
yield self.read_headers()
10521066
self.close()
10531067
# Let the hanging coroutine clean up after itself
@@ -1075,10 +1089,10 @@ def test_keepalive_chunked(self):
10751089
@gen_test
10761090
def test_keepalive_chunked_head_no_body(self):
10771091
yield self.connect()
1078-
self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
1092+
self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10791093
yield self.read_headers()
10801094

1081-
self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
1095+
self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10821096
yield self.read_headers()
10831097
self.close()
10841098

@@ -1337,7 +1351,7 @@ def test_idle_after_use(self):
13371351

13381352
# Use the connection twice to make sure keep-alives are working
13391353
for i in range(2):
1340-
stream.write(b"GET / HTTP/1.1\r\n\r\n")
1354+
stream.write(b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n")
13411355
yield stream.read_until(b"\r\n\r\n")
13421356
data = yield stream.read_bytes(11)
13431357
self.assertEqual(data, b"Hello world")
@@ -1459,14 +1473,17 @@ def test_body_size_override_reset(self):
14591473
# Use a raw stream so we can make sure it's all on one connection.
14601474
stream.write(
14611475
b"PUT /streaming?expected_size=10240 HTTP/1.1\r\n"
1476+
b"Host: 127.0.0.1\r\n"
14621477
b"Content-Length: 10240\r\n\r\n"
14631478
)
14641479
stream.write(b"a" * 10240)
14651480
start_line, headers, response = yield read_stream_body(stream)
14661481
self.assertEqual(response, b"10240")
14671482
# Without the ?expected_size parameter, we get the old default value
14681483
stream.write(
1469-
b"PUT /streaming HTTP/1.1\r\n" b"Content-Length: 10240\r\n\r\n"
1484+
b"PUT /streaming HTTP/1.1\r\n"
1485+
b"Host: 127.0.0.1\r\n"
1486+
b"Content-Length: 10240\r\n\r\n"
14701487
)
14711488
with ExpectLog(gen_log, ".*Content-Length too long", level=logging.INFO):
14721489
data = yield stream.read_until_close()

tornado/test/web_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2362,7 +2362,7 @@ def connect(self, url, connection_close):
23622362
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
23632363
s.connect(("127.0.0.1", self.get_http_port()))
23642364
stream = IOStream(s)
2365-
stream.write(b"GET " + url + b" HTTP/1.1\r\n")
2365+
stream.write(b"GET " + url + b" HTTP/1.1\r\nHost: 127.0.0.1\r\n")
23662366
if connection_close:
23672367
stream.write(b"Connection: close\r\n")
23682368
stream.write(b"Transfer-Encoding: chunked\r\n\r\n")

0 commit comments

Comments
 (0)