Skip to content

Commit 72386ad

Browse files
committed
Merge branch 'master' into free-threading
2 parents bad4486 + 5976db7 commit 72386ad

File tree

9 files changed

+347
-84
lines changed

9 files changed

+347
-84
lines changed

.github/zizmor.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
rules:
2+
unpinned-uses:
3+
config:
4+
policies:
5+
# Allow trusted repositories to use ref-pinning instead of hash-pinning.
6+
#
7+
# Defaults, from
8+
# https://github.com/woodruffw/zizmor/blob/7b4e76e94be2f4d7b455664ba5252b2b4458b91d/src/audit/unpinned_uses.rs#L172-L193
9+
actions/*: ref-pin
10+
github/*: ref-pin
11+
dependabot/*: ref-pin
12+
# Additional trusted repositories
13+
pypa/*: ref-pin
14+
astral-sh/setup-uv: ref-pin

docs/guide/templates.rst

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,25 @@ Consequently, if you write random stuff inside of your template
118118
expressions, you will get random Python errors when you execute the
119119
template.
120120

121-
All template output is escaped by default, using the
122-
`tornado.escape.xhtml_escape` function. This behavior can be changed
121+
Security
122+
~~~~~~~~
123+
124+
Inserting untrusted content into a web page can lead to security vulnerabilities such as cross-site
125+
scripting (XSS). All data that is passed to a template should be *escaped* to prevent these
126+
vulnerabilities. The correct form of escaping is context-dependent; Tornado's templates are not
127+
aware of the syntax of HTML, JavaScript, etc, and so the template developer must sometimes
128+
explicitly apply the correct escaping function.
129+
130+
The default escaping function is `tornado.escape.xhtml_escape`, which is appropriate for HTML body
131+
content (but not attribute values). In other cases, other functions should be used. In JavaScript,
132+
use the `.json_encode` function, e.g. ``<script>var x = {{ json_encode(some_object) }};</script>``.
133+
`.json_encode` can be used to escape strings, numbers, lists, and dicts. In this example, the
134+
JavaScript variable ``x`` will be the corresponding JavaScript type (string, number, array, or
135+
object), and not the JSON-encoded string representation. Note that it is unsafe to use
136+
`.json_encode` in the context of a JavaScript string literal (including template strings), only in
137+
the top-level syntactic context.
138+
139+
The automatic escaping behavior can be disabled
123140
globally by passing ``autoescape=None`` to the `.Application` or
124141
`.tornado.template.Loader` constructors, for a template file with the
125142
``{% autoescape None %}`` directive, or for a single expression by

tornado/httputil.py

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ class _ABNF:
8181
cannot be alphabetized as they are in the RFCs because of dependencies.
8282
"""
8383

84+
# RFC 3986 (URI)
85+
# The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
86+
# literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
87+
# it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
88+
# position, not only for their use in IPv6 literals.
89+
uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
90+
uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
91+
uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
92+
uri_host = re.compile(
93+
rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
94+
)
95+
uri_port = re.compile(r"[0-9]*")
96+
8497
# RFC 5234 (ABNF)
8598
VCHAR = re.compile(r"[\x21-\x7E]")
8699

@@ -91,6 +104,7 @@ class _ABNF:
91104
token = re.compile(rf"{tchar.pattern}+")
92105
field_name = token
93106
method = token
107+
host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
94108

95109
# RFC 9112 (HTTP/1.1)
96110
HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
@@ -207,18 +221,39 @@ def get_all(self) -> Iterable[Tuple[str, str]]:
207221
yield (name, value)
208222

209223
def parse_line(self, line: str) -> None:
210-
"""Updates the dictionary with a single header line.
224+
r"""Updates the dictionary with a single header line.
211225
212226
>>> h = HTTPHeaders()
213227
>>> h.parse_line("Content-Type: text/html")
214228
>>> h.get('content-type')
215229
'text/html'
230+
>>> h.parse_line("Content-Length: 42\r\n")
231+
>>> h.get('content-type')
232+
'text/html'
233+
234+
.. versionchanged:: 6.5
235+
Now supports lines with or without the trailing CRLF, making it possible
236+
to pass lines from AsyncHTTPClient's header_callback directly to this method.
237+
238+
.. deprecated:: 6.5
239+
In Tornado 7.0, certain deprecated features of HTTP will become errors.
240+
Specifically, line folding and the use of LF (with CR) as a line separator
241+
will be removed.
216242
"""
217-
if line[0].isspace():
243+
if m := re.search(r"\r?\n$", line):
244+
# RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
245+
# terminator and ignore any preceding CR.
246+
# TODO(7.0): Remove this support for LF-only line endings.
247+
line = line[: m.start()]
248+
if not line:
249+
# Empty line, or the final CRLF of a header block.
250+
return
251+
if line[0] in HTTP_WHITESPACE:
218252
# continuation of a multi-line header
253+
# TODO(7.0): Remove support for line folding.
219254
if self._last_key is None:
220255
raise HTTPInputError("first header line cannot start with whitespace")
221-
new_part = " " + line.lstrip(HTTP_WHITESPACE)
256+
new_part = " " + line.strip(HTTP_WHITESPACE)
222257
self._as_list[self._last_key][-1] += new_part
223258
self._dict[self._last_key] += new_part
224259
else:
@@ -243,13 +278,16 @@ def parse(cls, headers: str) -> "HTTPHeaders":
243278
244279
"""
245280
h = cls()
246-
# RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
247-
# terminator and ignore any preceding CR.
248-
for line in headers.split("\n"):
249-
if line.endswith("\r"):
250-
line = line[:-1]
251-
if line:
252-
h.parse_line(line)
281+
282+
start = 0
283+
while True:
284+
lf = headers.find("\n", start)
285+
if lf == -1:
286+
h.parse_line(headers[start:])
287+
break
288+
line = headers[start : lf + 1]
289+
start = lf + 1
290+
h.parse_line(line)
253291
return h
254292

255293
# MutableMapping abstract method implementations.
@@ -397,7 +435,7 @@ def __init__(
397435
version: str = "HTTP/1.0",
398436
headers: Optional[HTTPHeaders] = None,
399437
body: Optional[bytes] = None,
400-
host: Optional[str] = None,
438+
# host: Optional[str] = None,
401439
files: Optional[Dict[str, List["HTTPFile"]]] = None,
402440
connection: Optional["HTTPConnection"] = None,
403441
start_line: Optional["RequestStartLine"] = None,
@@ -416,7 +454,35 @@ def __init__(
416454
self.remote_ip = getattr(context, "remote_ip", None)
417455
self.protocol = getattr(context, "protocol", "http")
418456

419-
self.host = host or self.headers.get("Host") or "127.0.0.1"
457+
try:
458+
self.host = self.headers["Host"]
459+
except KeyError:
460+
if version == "HTTP/1.0":
461+
# HTTP/1.0 does not require the Host header.
462+
self.host = "127.0.0.1"
463+
else:
464+
raise HTTPInputError("Missing Host header")
465+
if not _ABNF.host.fullmatch(self.host):
466+
print(_ABNF.host.pattern)
467+
raise HTTPInputError("Invalid Host header: %r" % self.host)
468+
if "," in self.host:
469+
# https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
470+
# Server MUST respond with 400 Bad Request if multiple
471+
# Host headers are present.
472+
#
473+
# We test for the presence of a comma instead of the number of
474+
# headers received because a proxy may have converted
475+
# multiple headers into a single comma-separated value
476+
# (per RFC 9110 section 5.3).
477+
#
478+
# This is technically a departure from the RFC since the ABNF
479+
# does not forbid commas in the host header. However, since
480+
# commas are not allowed in DNS names, it is appropriate to
481+
# disallow them. (The same argument could be made for other special
482+
# characters, but commas are the most problematic since they could
483+
# be used to exploit differences between proxies when multiple headers
484+
# are supplied).
485+
raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
420486
self.host_name = split_host_and_port(self.host.lower())[0]
421487
self.files = files or {}
422488
self.connection = connection

tornado/test/httpclient_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,23 @@ def streaming_callback(chunk):
482482
self.assertRegex(first_line[0], "HTTP/[0-9]\\.[0-9] 200.*\r\n")
483483
self.assertEqual(chunks, [b"asdf", b"qwer"])
484484

485+
def test_header_callback_to_parse_line(self):
486+
# Make a request with header_callback and feed the headers to HTTPHeaders.parse_line.
487+
# (Instead of HTTPHeaders.parse which is used in normal cases). Ensure that the resulting
488+
# headers are as expected, and in particular do not have trailing whitespace added
489+
# due to the final CRLF line.
490+
headers = HTTPHeaders()
491+
492+
def header_callback(line):
493+
if line.startswith("HTTP/"):
494+
# Ignore the first status line
495+
return
496+
headers.parse_line(line)
497+
498+
self.fetch("/hello", header_callback=header_callback)
499+
for k, v in headers.get_all():
500+
self.assertTrue(v == v.strip(), (k, v))
501+
485502
@gen_test
486503
def test_configure_defaults(self):
487504
defaults = dict(user_agent="TestDefaultUserAgent", allow_ipv6=False)

tornado/test/httpserver_test.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ def test_100_continue(self):
267267
b"\r\n".join(
268268
[
269269
b"POST /hello HTTP/1.1",
270+
b"Host: 127.0.0.1",
270271
b"Content-Length: 1024",
271272
b"Expect: 100-continue",
272273
b"Connection: close",
@@ -467,6 +468,7 @@ def test_chunked_request_body(self):
467468
self.stream.write(
468469
b"""\
469470
POST /echo HTTP/1.1
471+
Host: 127.0.0.1
470472
Transfer-Encoding: chunked
471473
Content-Type: application/x-www-form-urlencoded
472474
@@ -491,6 +493,7 @@ def test_chunked_request_uppercase(self):
491493
self.stream.write(
492494
b"""\
493495
POST /echo HTTP/1.1
496+
Host: 127.0.0.1
494497
Transfer-Encoding: Chunked
495498
Content-Type: application/x-www-form-urlencoded
496499
@@ -515,6 +518,7 @@ def test_chunked_request_body_invalid_size(self):
515518
self.stream.write(
516519
b"""\
517520
POST /echo HTTP/1.1
521+
Host: 127.0.0.1
518522
Transfer-Encoding: chunked
519523
520524
1_a
@@ -537,6 +541,7 @@ def test_chunked_request_body_duplicate_header(self):
537541
self.stream.write(
538542
b"""\
539543
POST /echo HTTP/1.1
544+
Host: 127.0.0.1
540545
Transfer-Encoding: chunked
541546
Transfer-encoding: chunked
542547
@@ -561,6 +566,7 @@ def test_chunked_request_body_unsupported_transfer_encoding(self):
561566
self.stream.write(
562567
b"""\
563568
POST /echo HTTP/1.1
569+
Host: 127.0.0.1
564570
Transfer-Encoding: gzip, chunked
565571
566572
2
@@ -582,6 +588,7 @@ def test_chunked_request_body_transfer_encoding_and_content_length(self):
582588
self.stream.write(
583589
b"""\
584590
POST /echo HTTP/1.1
591+
Host: 127.0.0.1
585592
Transfer-Encoding: chunked
586593
Content-Length: 2
587594
@@ -624,6 +631,7 @@ def test_invalid_content_length(self):
624631
textwrap.dedent(
625632
f"""\
626633
POST /echo HTTP/1.1
634+
Host: 127.0.0.1
627635
Content-Length: {value}
628636
Connection: close
629637
@@ -659,7 +667,7 @@ def noop_context():
659667
expect_log,
660668
):
661669
yield stream.connect(("127.0.0.1", self.get_http_port()))
662-
stream.write(utf8(f"{method} /echo HTTP/1.1\r\n\r\n"))
670+
stream.write(utf8(f"{method} /echo HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"))
663671
resp = yield stream.read_until(b"\r\n\r\n")
664672
self.assertTrue(
665673
resp.startswith(b"HTTP/1.1 %d" % code),
@@ -968,16 +976,18 @@ def close(self):
968976
@gen_test
969977
def test_two_requests(self):
970978
yield self.connect()
971-
self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
979+
self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
972980
yield self.read_response()
973-
self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
981+
self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
974982
yield self.read_response()
975983
self.close()
976984

977985
@gen_test
978986
def test_request_close(self):
979987
yield self.connect()
980-
self.stream.write(b"GET / HTTP/1.1\r\nConnection: close\r\n\r\n")
988+
self.stream.write(
989+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\nConnection: close\r\n\r\n"
990+
)
981991
yield self.read_response()
982992
data = yield self.stream.read_until_close()
983993
self.assertTrue(not data)
@@ -1023,31 +1033,35 @@ def test_http10_keepalive_extra_crlf(self):
10231033
@gen_test
10241034
def test_pipelined_requests(self):
10251035
yield self.connect()
1026-
self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
1036+
self.stream.write(
1037+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
1038+
)
10271039
yield self.read_response()
10281040
yield self.read_response()
10291041
self.close()
10301042

10311043
@gen_test
10321044
def test_pipelined_cancel(self):
10331045
yield self.connect()
1034-
self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
1046+
self.stream.write(
1047+
b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
1048+
)
10351049
# only read once
10361050
yield self.read_response()
10371051
self.close()
10381052

10391053
@gen_test
10401054
def test_cancel_during_download(self):
10411055
yield self.connect()
1042-
self.stream.write(b"GET /large HTTP/1.1\r\n\r\n")
1056+
self.stream.write(b"GET /large HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10431057
yield self.read_headers()
10441058
yield self.stream.read_bytes(1024)
10451059
self.close()
10461060

10471061
@gen_test
10481062
def test_finish_while_closed(self):
10491063
yield self.connect()
1050-
self.stream.write(b"GET /finish_on_close HTTP/1.1\r\n\r\n")
1064+
self.stream.write(b"GET /finish_on_close HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10511065
yield self.read_headers()
10521066
self.close()
10531067
# Let the hanging coroutine clean up after itself
@@ -1075,10 +1089,10 @@ def test_keepalive_chunked(self):
10751089
@gen_test
10761090
def test_keepalive_chunked_head_no_body(self):
10771091
yield self.connect()
1078-
self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
1092+
self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10791093
yield self.read_headers()
10801094

1081-
self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
1095+
self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
10821096
yield self.read_headers()
10831097
self.close()
10841098

@@ -1337,7 +1351,7 @@ def test_idle_after_use(self):
13371351

13381352
# Use the connection twice to make sure keep-alives are working
13391353
for i in range(2):
1340-
stream.write(b"GET / HTTP/1.1\r\n\r\n")
1354+
stream.write(b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n")
13411355
yield stream.read_until(b"\r\n\r\n")
13421356
data = yield stream.read_bytes(11)
13431357
self.assertEqual(data, b"Hello world")
@@ -1459,14 +1473,17 @@ def test_body_size_override_reset(self):
14591473
# Use a raw stream so we can make sure it's all on one connection.
14601474
stream.write(
14611475
b"PUT /streaming?expected_size=10240 HTTP/1.1\r\n"
1476+
b"Host: 127.0.0.1\r\n"
14621477
b"Content-Length: 10240\r\n\r\n"
14631478
)
14641479
stream.write(b"a" * 10240)
14651480
start_line, headers, response = yield read_stream_body(stream)
14661481
self.assertEqual(response, b"10240")
14671482
# Without the ?expected_size parameter, we get the old default value
14681483
stream.write(
1469-
b"PUT /streaming HTTP/1.1\r\n" b"Content-Length: 10240\r\n\r\n"
1484+
b"PUT /streaming HTTP/1.1\r\n"
1485+
b"Host: 127.0.0.1\r\n"
1486+
b"Content-Length: 10240\r\n\r\n"
14701487
)
14711488
with ExpectLog(gen_log, ".*Content-Length too long", level=logging.INFO):
14721489
data = yield stream.read_until_close()

0 commit comments

Comments
 (0)