Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions tornado/httputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,19 +1082,34 @@ def parse_response_start_line(line: str) -> ResponseStartLine:
# It has also been modified to support valueless parameters as seen in
# websocket extension negotiations, and to support non-ascii values in
# RFC 2231/5987 format.
#
# _parseparam has been further modified with the logic from
# https://github.com/python/cpython/pull/136072/files
# to avoid quadratic behavior when parsing semicolons in quoted strings.
#
# TODO: See if we can switch to email.message.Message for this functionality.
# This is the suggested replacement for the cgi.py module now that cgi has
# been removed from recent versions of Python. We need to verify that
# the email module is consistent with our existing behavior (and all relevant
# RFCs for multipart/form-data) before making this change.


def _parseparam(s: str) -> Generator[str, None, None]:
while s[:1] == ";":
s = s[1:]
end = s.find(";")
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(";", end + 1)
start = 0
while s.find(";", start) == start:
start += 1
end = s.find(";", start)
ind, diff = start, 0
while end > 0:
diff += s.count('"', ind, end) - s.count('\\"', ind, end)
if diff % 2 == 0:
break
end, ind = ind, s.find(";", end + 1)
if end < 0:
end = len(s)
f = s[:end]
f = s[start:end]
yield f.strip()
s = s[end:]
start = end


def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
Expand Down
23 changes: 23 additions & 0 deletions tornado/test/httputil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,29 @@ def test_data_after_final_boundary(self):
self.assertEqual(file["filename"], "ab.txt")
self.assertEqual(file["body"], b"Foo")

def test_disposition_param_linear_performance(self):
# This is a regression test for performance of parsing parameters
# to the content-disposition header, specifically for semicolons within
# quoted strings.
def f(n):
start = time.time()
message = (
b"--1234\r\nContent-Disposition: form-data; "
+ b'x="'
+ b";" * n
+ b'"; '
+ b'name="files"; filename="a.txt"\r\n\r\nFoo\r\n--1234--\r\n'
)
args: dict[str, list[bytes]] = {}
files: dict[str, list[HTTPFile]] = {}
parse_multipart_form_data(b"1234", message, args, files)
return time.time() - start

d1 = f(1_000)
d2 = f(10_000)
if d2 / d1 > 20:
self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")


class HTTPHeadersTest(unittest.TestCase):
def test_multi_line(self):
Expand Down