Skip to content

Commit 9dea6dc

Browse files
authored
Merge pull request #3477 from bdarnell/header-trailing-whitespace
httputil: Improve handling of trailing whitespace in headers
2 parents c0d5b68 + 002f3a1 commit 9dea6dc

File tree

2 files changed

+50
-9
lines changed

2 files changed

+50
-9
lines changed

tornado/httputil.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -207,18 +207,39 @@ def get_all(self) -> Iterable[Tuple[str, str]]:
207207
yield (name, value)
208208

209209
def parse_line(self, line: str) -> None:
210-
"""Updates the dictionary with a single header line.
210+
r"""Updates the dictionary with a single header line.
211211
212212
>>> h = HTTPHeaders()
213213
>>> h.parse_line("Content-Type: text/html")
214214
>>> h.get('content-type')
215215
'text/html'
216+
>>> h.parse_line("Content-Length: 42\r\n")
217+
>>> h.get('content-type')
218+
'text/html'
219+
220+
.. versionchanged:: 6.5
221+
Now supports lines with or without the trailing CRLF, making it possible
222+
to pass lines from AsyncHTTPClient's header_callback directly to this method.
223+
224+
.. deprecated:: 6.5
225+
In Tornado 7.0, certain deprecated features of HTTP will become errors.
226+
Specifically, line folding and the use of LF (with CR) as a line separator
227+
will be removed.
216228
"""
229+
if m := re.search(r"\r?\n$", line):
230+
# RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
231+
# terminator and ignore any preceding CR.
232+
# TODO(7.0): Remove this support for LF-only line endings.
233+
line = line[: m.start()]
234+
if not line:
235+
# Empty line, or the final CRLF of a header block.
236+
return
217237
if line[0].isspace():
218238
# continuation of a multi-line header
239+
# TODO(7.0): Remove support for line folding.
219240
if self._last_key is None:
220241
raise HTTPInputError("first header line cannot start with whitespace")
221-
new_part = " " + line.lstrip(HTTP_WHITESPACE)
242+
new_part = " " + line.strip(HTTP_WHITESPACE)
222243
self._as_list[self._last_key][-1] += new_part
223244
self._dict[self._last_key] += new_part
224245
else:
@@ -243,13 +264,16 @@ def parse(cls, headers: str) -> "HTTPHeaders":
243264
244265
"""
245266
h = cls()
246-
# RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
247-
# terminator and ignore any preceding CR.
248-
for line in headers.split("\n"):
249-
if line.endswith("\r"):
250-
line = line[:-1]
251-
if line:
252-
h.parse_line(line)
267+
268+
start = 0
269+
while True:
270+
lf = headers.find("\n", start)
271+
if lf == -1:
272+
h.parse_line(headers[start:])
273+
break
274+
line = headers[start : lf + 1]
275+
start = lf + 1
276+
h.parse_line(line)
253277
return h
254278

255279
# MutableMapping abstract method implementations.

tornado/test/httpclient_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,23 @@ def streaming_callback(chunk):
482482
self.assertRegex(first_line[0], "HTTP/[0-9]\\.[0-9] 200.*\r\n")
483483
self.assertEqual(chunks, [b"asdf", b"qwer"])
484484

485+
def test_header_callback_to_parse_line(self):
486+
# Make a request with header_callback and feed the headers to HTTPHeaders.parse_line.
487+
# (Instead of HTTPHeaders.parse which is used in normal cases). Ensure that the resulting
488+
# headers are as expected, and in particular do not have trailing whitespace added
489+
# due to the final CRLF line.
490+
headers = HTTPHeaders()
491+
492+
def header_callback(line):
493+
if line.startswith("HTTP/"):
494+
# Ignore the first status line
495+
return
496+
headers.parse_line(line)
497+
498+
self.fetch("/hello", header_callback=header_callback)
499+
for k, v in headers.get_all():
500+
self.assertTrue(v == v.strip(), (k, v))
501+
485502
@gen_test
486503
def test_configure_defaults(self):
487504
defaults = dict(user_agent="TestDefaultUserAgent", allow_ipv6=False)

0 commit comments

Comments
 (0)