Skip to content

Commit bb61400

Browse files
authored
Merge pull request #3489 from bdarnell/field-value
httputil: Forbid control chars and CR in header values
2 parents 5976db7 + 4bdd0bc commit bb61400

File tree

2 files changed

+38
-9
lines changed

2 files changed

+38
-9
lines changed

tornado/httputil.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import unicodedata
3434
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
3535

36-
from tornado.escape import native_str, parse_qs_bytes, utf8
36+
from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
3737
from tornado.log import gen_log
3838
from tornado.util import ObjectDict, unicode_type
3939

@@ -100,6 +100,12 @@ class _ABNF:
100100
# RFC 9110 (HTTP Semantics)
101101
obs_text = re.compile(r"[\x80-\xFF]")
102102
field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
103+
# Not exactly from the RFC to simplify and combine field-content and field-value.
104+
field_value = re.compile(
105+
rf"|"
106+
rf"{field_vchar.pattern}|"
107+
rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
108+
)
103109
tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
104110
token = re.compile(rf"{tchar.pattern}+")
105111
field_name = token
@@ -195,6 +201,10 @@ def add(self, name: str, value: str) -> None:
195201
"""Adds a new value for the given key."""
196202
if not _ABNF.field_name.fullmatch(name):
197203
raise HTTPInputError("Invalid header name %r" % name)
204+
if not _ABNF.field_value.fullmatch(to_unicode(value)):
205+
# TODO: the fact we still support bytes here (contrary to type annotations)
206+
# and still test for it should probably be changed.
207+
raise HTTPInputError("Invalid header value %r" % value)
198208
norm_name = _normalize_header(name)
199209
self._last_key = norm_name
200210
if norm_name in self:
@@ -254,6 +264,8 @@ def parse_line(self, line: str) -> None:
254264
if self._last_key is None:
255265
raise HTTPInputError("first header line cannot start with whitespace")
256266
new_part = " " + line.strip(HTTP_WHITESPACE)
267+
if not _ABNF.field_value.fullmatch(new_part[1:]):
268+
raise HTTPInputError("Invalid header continuation %r" % new_part)
257269
self._as_list[self._last_key][-1] += new_part
258270
self._dict[self._last_key] += new_part
259271
else:

tornado/test/httputil_test.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,17 @@ def test_continuation(self):
303303
data = "Foo: bar\r\n\fasdf"
304304
self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
305305

306+
def test_forbidden_ascii_characters(self):
307+
# Control characters and ASCII whitespace other than space, tab, and CRLF are not allowed in
308+
# headers.
309+
for c in range(0xFF):
310+
data = f"Foo: bar{chr(c)}baz\r\n"
311+
if c == 0x09 or (c >= 0x20 and c != 0x7F):
312+
headers = HTTPHeaders.parse(data)
313+
self.assertEqual(headers["Foo"], f"bar{chr(c)}baz")
314+
else:
315+
self.assertRaises(HTTPInputError, HTTPHeaders.parse, data)
316+
306317
def test_unicode_newlines(self):
307318
# Ensure that only \r\n is recognized as a header separator, and not
308319
# the other newline-like unicode characters.
@@ -311,10 +322,13 @@ def test_unicode_newlines(self):
311322
# and cpython's unicodeobject.c (which defines the implementation
312323
# of unicode_type.splitlines(), and uses a different list than TR13).
313324
newlines = [
314-
"\u001b", # VERTICAL TAB
315-
"\u001c", # FILE SEPARATOR
316-
"\u001d", # GROUP SEPARATOR
317-
"\u001e", # RECORD SEPARATOR
325+
# The following ascii characters are sometimes treated as newline-like,
326+
# but they're disallowed in HTTP headers. This test covers unicode
327+
# characters that are permitted in headers (under the obs-text rule).
328+
# "\u001b", # VERTICAL TAB
329+
# "\u001c", # FILE SEPARATOR
330+
# "\u001d", # GROUP SEPARATOR
331+
# "\u001e", # RECORD SEPARATOR
318332
"\u0085", # NEXT LINE
319333
"\u2028", # LINE SEPARATOR
320334
"\u2029", # PARAGRAPH SEPARATOR
@@ -363,13 +377,16 @@ def test_unicode_whitespace(self):
363377
self.assertEqual(expected, list(headers.get_all()))
364378

365379
def test_optional_cr(self):
380+
# Bare CR is not a valid line separator
381+
with self.assertRaises(HTTPInputError):
382+
HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nCR: cr\rMore: more\r\n")
383+
366384
# Both CRLF and LF should be accepted as separators. CR should not be
367-
# part of the data when followed by LF, but it is a normal char
368-
# otherwise (or should bare CR be an error?)
369-
headers = HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nCR: cr\rMore: more\r\n")
385+
# part of the data when followed by LF.
386+
headers = HTTPHeaders.parse("CRLF: crlf\r\nLF: lf\nMore: more\r\n")
370387
self.assertEqual(
371388
sorted(headers.get_all()),
372-
[("Cr", "cr\rMore: more"), ("Crlf", "crlf"), ("Lf", "lf")],
389+
[("Crlf", "crlf"), ("Lf", "lf"), ("More", "more")],
373390
)
374391

375392
def test_copy(self):

0 commit comments

Comments
 (0)