Skip to content

Commit 002f3a1

Browse files
committed
httputil: Improve handling of trailing whitespace in headers
HTTPHeaders had undocumented assumptions about trailing whitespace, leading to an unintentional regression in Tornado 6.4.1 in which passing the arguments of an AsyncHTTPClient header_callback to HTTPHeaders.parse_line would result in errors. This commit moves newline parsing from parse to parse_line. It also strips trailing whitespace from continuation lines (trailing whitespace is not allowed in HTTP headers, but we didn't reject it in continuation lines). This commit also deprecates continuation lines and the legacy handling of LF without CR. Fixes #3321
1 parent 0b7e700 commit 002f3a1

File tree

2 files changed

+50
-9
lines changed

2 files changed

+50
-9
lines changed

tornado/httputil.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -207,18 +207,39 @@ def get_all(self) -> Iterable[Tuple[str, str]]:
207207
yield (name, value)
208208

209209
def parse_line(self, line: str) -> None:
210-
"""Updates the dictionary with a single header line.
210+
r"""Updates the dictionary with a single header line.
211211
212212
>>> h = HTTPHeaders()
213213
>>> h.parse_line("Content-Type: text/html")
214214
>>> h.get('content-type')
215215
'text/html'
216+
>>> h.parse_line("Content-Length: 42\r\n")
217+
>>> h.get('content-type')
218+
'text/html'
219+
220+
.. versionchanged:: 6.5
221+
Now supports lines with or without the trailing CRLF, making it possible
222+
to pass lines from AsyncHTTPClient's header_callback directly to this method.
223+
224+
.. deprecated:: 6.5
225+
In Tornado 7.0, certain deprecated features of HTTP will become errors.
226+
Specifically, line folding and the use of LF (with CR) as a line separator
227+
will be removed.
216228
"""
229+
if m := re.search(r"\r?\n$", line):
230+
# RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
231+
# terminator and ignore any preceding CR.
232+
# TODO(7.0): Remove this support for LF-only line endings.
233+
line = line[: m.start()]
234+
if not line:
235+
# Empty line, or the final CRLF of a header block.
236+
return
217237
if line[0].isspace():
218238
# continuation of a multi-line header
239+
# TODO(7.0): Remove support for line folding.
219240
if self._last_key is None:
220241
raise HTTPInputError("first header line cannot start with whitespace")
221-
new_part = " " + line.lstrip(HTTP_WHITESPACE)
242+
new_part = " " + line.strip(HTTP_WHITESPACE)
222243
self._as_list[self._last_key][-1] += new_part
223244
self._dict[self._last_key] += new_part
224245
else:
@@ -243,13 +264,16 @@ def parse(cls, headers: str) -> "HTTPHeaders":
243264
244265
"""
245266
h = cls()
246-
# RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
247-
# terminator and ignore any preceding CR.
248-
for line in headers.split("\n"):
249-
if line.endswith("\r"):
250-
line = line[:-1]
251-
if line:
252-
h.parse_line(line)
267+
268+
start = 0
269+
while True:
270+
lf = headers.find("\n", start)
271+
if lf == -1:
272+
h.parse_line(headers[start:])
273+
break
274+
line = headers[start : lf + 1]
275+
start = lf + 1
276+
h.parse_line(line)
253277
return h
254278

255279
# MutableMapping abstract method implementations.

tornado/test/httpclient_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,23 @@ def streaming_callback(chunk):
482482
self.assertRegex(first_line[0], "HTTP/[0-9]\\.[0-9] 200.*\r\n")
483483
self.assertEqual(chunks, [b"asdf", b"qwer"])
484484

485+
def test_header_callback_to_parse_line(self):
486+
# Make a request with header_callback and feed the headers to HTTPHeaders.parse_line.
487+
# (Instead of HTTPHeaders.parse which is used in normal cases). Ensure that the resulting
488+
# headers are as expected, and in particular do not have trailing whitespace added
489+
# due to the final CRLF line.
490+
headers = HTTPHeaders()
491+
492+
def header_callback(line):
493+
if line.startswith("HTTP/"):
494+
# Ignore the first status line
495+
return
496+
headers.parse_line(line)
497+
498+
self.fetch("/hello", header_callback=header_callback)
499+
for k, v in headers.get_all():
500+
self.assertTrue(v == v.strip(), (k, v))
501+
485502
@gen_test
486503
def test_configure_defaults(self):
487504
defaults = dict(user_agent="TestDefaultUserAgent", allow_ipv6=False)

0 commit comments

Comments
 (0)