Skip to content

Commit 2121094

Browse files
committed
fix quadratic worst-time complexity in _header_value_parser.py
1 parent eb145fa commit 2121094

File tree

3 files changed

+59
-48
lines changed

3 files changed

+59
-48
lines changed

Lib/email/_header_value_parser.py

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,13 @@
8282

8383
WSP = set(' \t')
8484
CFWS_LEADER = WSP | set('(')
85+
CFWS_LEADER_WITH_DOT = CFWS_LEADER | set('.')
8586
SPECIALS = set(r'()<>@,:;.\"[]')
8687
ATOM_ENDS = SPECIALS | WSP
8788
DOT_ATOM_ENDS = ATOM_ENDS - set('.')
8889
# '.', '"', and '(' do not end phrases in order to support obs-phrase
8990
PHRASE_ENDS = SPECIALS - set('."(')
91+
PHRASE_ENDS_CHARS = r''.join(PHRASE_ENDS)
9092
TSPECIALS = (SPECIALS | set('/?=')) - set('.')
9193
TOKEN_ENDS = TSPECIALS | WSP
9294
ASPECIALS = TSPECIALS | set("*'%")
@@ -1300,6 +1302,12 @@ def get_cfws(value):
13001302
cfws.append(token)
13011303
return cfws, value
13021304

1305+
def get_cfws_digits(value, leader_set):
1306+
ind = 0
1307+
while ind < len(value) and value[ind] not in leader_set:
1308+
ind += 1
1309+
return value[:ind], value[ind:]
1310+
13031311
def get_quoted_string(value):
13041312
"""quoted-string = [CFWS] <bare-quoted-string> [CFWS]
13051313
@@ -1443,11 +1451,13 @@ def get_phrase(value):
14431451
phrase.defects.append(errors.InvalidHeaderDefect(
14441452
"phrase does not start with word"))
14451453
while value and value[0] not in PHRASE_ENDS:
1446-
if value[0]=='.':
1447-
phrase.append(DOT)
1448-
phrase.defects.append(errors.ObsoleteHeaderDefect(
1449-
"period in 'phrase'"))
1450-
value = value[1:]
1454+
if value[0] == '.':
1455+
tmpvalue = value.lstrip('.')
1456+
for _ in range(len(value) - len(tmpvalue)):
1457+
phrase.append(DOT)
1458+
phrase.defects.append(errors.ObsoleteHeaderDefect(
1459+
"period in 'phrase'"))
1460+
value = tmpvalue
14511461
else:
14521462
try:
14531463
token, value = get_word(value)
@@ -1461,6 +1471,20 @@ def get_phrase(value):
14611471
phrase.append(token)
14621472
return phrase, value
14631473

1474+
def _find_phrase(reslist, value, endchars):
1475+
# lstrip() should not strip stuff in 'endchars'
1476+
phrase_end_chars = ''.join(PHRASE_ENDS - set(endchars))
1477+
while value and value[0] not in endchars:
1478+
if value[0] in PHRASE_ENDS:
1479+
tmpvalue = value.lstrip(phrase_end_chars)
1480+
for i in range(len(value) - len(tmpvalue)):
1481+
reslist.append(ValueTerminal(value[i], 'misplaced-special'))
1482+
value = tmpvalue
1483+
else:
1484+
token, value = get_phrase(value)
1485+
reslist.append(token)
1486+
return value
1487+
14641488
def get_local_part(value):
14651489
""" local-part = dot-atom / quoted-string / obs-local-part
14661490
@@ -1842,14 +1866,7 @@ def get_invalid_mailbox(value, endchars):
18421866
18431867
"""
18441868
invalid_mailbox = InvalidMailbox()
1845-
while value and value[0] not in endchars:
1846-
if value[0] in PHRASE_ENDS:
1847-
invalid_mailbox.append(ValueTerminal(value[0],
1848-
'misplaced-special'))
1849-
value = value[1:]
1850-
else:
1851-
token, value = get_phrase(value)
1852-
invalid_mailbox.append(token)
1869+
value = _find_phrase(invalid_mailbox, value, endchars)
18531870
return invalid_mailbox, value
18541871

18551872
def get_mailbox_list(value):
@@ -2196,10 +2213,7 @@ def parse_mime_version(value):
21962213
if not value:
21972214
mime_version.defects.append(errors.HeaderMissingRequiredValue(
21982215
"Expected MIME version number but found only CFWS"))
2199-
digits = ''
2200-
while value and value[0] != '.' and value[0] not in CFWS_LEADER:
2201-
digits += value[0]
2202-
value = value[1:]
2216+
digits, value = get_cfws_digits(value, CFWS_LEADER_WITH_DOT)
22032217
if not digits.isdigit():
22042218
mime_version.defects.append(errors.InvalidHeaderDefect(
22052219
"Expected MIME major version number but found {!r}".format(digits)))
@@ -2227,10 +2241,7 @@ def parse_mime_version(value):
22272241
mime_version.defects.append(errors.InvalidHeaderDefect(
22282242
"Incomplete MIME version; found only major number"))
22292243
return mime_version
2230-
digits = ''
2231-
while value and value[0] not in CFWS_LEADER:
2232-
digits += value[0]
2233-
value = value[1:]
2244+
digits, value = get_cfws_digits(value, CFWS_LEADER)
22342245
if not digits.isdigit():
22352246
mime_version.defects.append(errors.InvalidHeaderDefect(
22362247
"Expected MIME minor version number but found {!r}".format(digits)))
@@ -2255,14 +2266,7 @@ def get_invalid_parameter(value):
22552266
22562267
"""
22572268
invalid_parameter = InvalidParameter()
2258-
while value and value[0] != ';':
2259-
if value[0] in PHRASE_ENDS:
2260-
invalid_parameter.append(ValueTerminal(value[0],
2261-
'misplaced-special'))
2262-
value = value[1:]
2263-
else:
2264-
token, value = get_phrase(value)
2265-
invalid_parameter.append(token)
2269+
value = _find_phrase(invalid_parameter, value, ';')
22662270
return invalid_parameter, value
22672271

22682272
def get_ttext(value):
@@ -2407,10 +2411,8 @@ def get_section(value):
24072411
if not value or not value[0].isdigit():
24082412
raise errors.HeaderParseError("Expected section number but "
24092413
"found {}".format(value))
2410-
digits = ''
2411-
while value and value[0].isdigit():
2412-
digits += value[0]
2413-
value = value[1:]
2414+
ind = next((i for i, ch in enumerate(value) if not ch.isdigit()), 0)
2415+
digits, value = value[:ind], value[ind:]
24142416
if digits[0] == '0' and digits != '0':
24152417
section.defects.append(errors.InvalidHeaderDefect(
24162418
"section number has an invalid leading 0"))
@@ -2638,17 +2640,10 @@ def _find_mime_parameters(tokenlist, value):
26382640
"""Do our best to find the parameters in an invalid MIME header
26392641
26402642
"""
2641-
while value and value[0] != ';':
2642-
if value[0] in PHRASE_ENDS:
2643-
tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
2644-
value = value[1:]
2645-
else:
2646-
token, value = get_phrase(value)
2647-
tokenlist.append(token)
2648-
if not value:
2649-
return
2650-
tokenlist.append(ValueTerminal(';', 'parameter-separator'))
2651-
tokenlist.append(parse_mime_parameters(value[1:]))
2643+
value = _find_phrase(tokenlist, value, ';')
2644+
if value:
2645+
tokenlist.append(ValueTerminal(';', 'parameter-separator'))
2646+
tokenlist.append(parse_mime_parameters(value[1:]))
26522647

26532648
def parse_content_type_header(value):
26542649
""" maintype "/" subtype *( ";" parameter )
@@ -2757,12 +2752,16 @@ def parse_content_transfer_encoding_header(value):
27572752
if not value:
27582753
return cte_header
27592754
while value:
2760-
cte_header.defects.append(errors.InvalidHeaderDefect(
2761-
"Extra text after content transfer encoding"))
27622755
if value[0] in PHRASE_ENDS:
2763-
cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
2764-
value = value[1:]
2756+
tmpvalue = value.lstrip(PHRASE_ENDS_CHARS)
2757+
for i in range(len(value) - len(tmpvalue)):
2758+
cte_header.defects.append(errors.InvalidHeaderDefect(
2759+
"Extra text after content transfer encoding"))
2760+
cte_header.append(ValueTerminal(value[i], 'misplaced-special'))
2761+
value = tmpvalue
27652762
else:
2763+
cte_header.defects.append(errors.InvalidHeaderDefect(
2764+
"Extra text after content transfer encoding"))
27662765
token, value = get_phrase(value)
27672766
cte_header.append(token)
27682767
return cte_header

Lib/test/test_email/test__header_value_parser.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2676,6 +2676,16 @@ def test_invalid_content_transfer_encoding(self):
26762676
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
26772677
)
26782678

2679+
def test_invalid_content_transfer_encoding_misplaced_special(self):
2680+
cte = parser.parse_content_transfer_encoding_header("foo;;;;;")
2681+
self.assertEqual(len(cte), 6)
2682+
self.assertEqual(cte[0].value, "foo")
2683+
self.assertEqual(cte[0].token_type, "token")
2684+
self.assertEqual(cte[0].value, "foo")
2685+
self.assertEqual(cte[0].token_type, "token")
2686+
terminal = parser.ValueTerminal(";", "misplaced-special")
2687+
self.assertEqual(cte[1:], [terminal] * 5)
2688+
26792689
# get_msg_id
26802690

26812691
def test_get_msg_id_empty(self):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix various HTTP header value parsing routines with worst-time
2+
quadratic-complexity. Patch by Bénédikt Tran.

0 commit comments

Comments
 (0)