8282
8383WSP = set (' \t ' )
8484CFWS_LEADER = WSP | set ('(' )
85+ CFWS_LEADER_WITH_DOT = CFWS_LEADER | set ('.' )
8586SPECIALS = set (r'()<>@,:;.\"[]' )
8687ATOM_ENDS = SPECIALS | WSP
8788DOT_ATOM_ENDS = ATOM_ENDS - set ('.' )
8889# '.', '"', and '(' do not end phrases in order to support obs-phrase
8990PHRASE_ENDS = SPECIALS - set ('."(' )
91+ PHRASE_ENDS_CHARS = r'' .join (PHRASE_ENDS )
9092TSPECIALS = (SPECIALS | set ('/?=' )) - set ('.' )
9193TOKEN_ENDS = TSPECIALS | WSP
9294ASPECIALS = TSPECIALS | set ("*'%" )
@@ -1300,6 +1302,12 @@ def get_cfws(value):
13001302 cfws .append (token )
13011303 return cfws , value
13021304
1305+ def get_cfws_digits (value , leader_set ):
1306+ ind = 0
1307+ while ind < len (value ) and value [ind ] not in leader_set :
1308+ ind += 1
1309+ return value [:ind ], value [ind :]
1310+
13031311def get_quoted_string (value ):
13041312 """quoted-string = [CFWS] <bare-quoted-string> [CFWS]
13051313
@@ -1443,11 +1451,13 @@ def get_phrase(value):
14431451 phrase .defects .append (errors .InvalidHeaderDefect (
14441452 "phrase does not start with word" ))
14451453 while value and value [0 ] not in PHRASE_ENDS :
1446- if value [0 ]== '.' :
1447- phrase .append (DOT )
1448- phrase .defects .append (errors .ObsoleteHeaderDefect (
1449- "period in 'phrase'" ))
1450- value = value [1 :]
1454+ if value [0 ] == '.' :
1455+ tmpvalue = value .lstrip ('.' )
1456+ for _ in range (len (value ) - len (tmpvalue )):
1457+ phrase .append (DOT )
1458+ phrase .defects .append (errors .ObsoleteHeaderDefect (
1459+ "period in 'phrase'" ))
1460+ value = tmpvalue
14511461 else :
14521462 try :
14531463 token , value = get_word (value )
@@ -1461,6 +1471,20 @@ def get_phrase(value):
14611471 phrase .append (token )
14621472 return phrase , value
14631473
1474+ def _find_phrase (reslist , value , endchars ):
1475+ # lstrip() should not strip stuff in 'endchars'
1476+ phrase_end_chars = '' .join (PHRASE_ENDS - set (endchars ))
1477+ while value and value [0 ] not in endchars :
1478+ if value [0 ] in PHRASE_ENDS :
1479+ tmpvalue = value .lstrip (phrase_end_chars )
1480+ for i in range (len (value ) - len (tmpvalue )):
1481+ reslist .append (ValueTerminal (value [i ], 'misplaced-special' ))
1482+ value = tmpvalue
1483+ else :
1484+ token , value = get_phrase (value )
1485+ reslist .append (token )
1486+ return value
1487+
14641488def get_local_part (value ):
14651489 """ local-part = dot-atom / quoted-string / obs-local-part
14661490
@@ -1842,14 +1866,7 @@ def get_invalid_mailbox(value, endchars):
18421866
18431867 """
18441868 invalid_mailbox = InvalidMailbox ()
1845- while value and value [0 ] not in endchars :
1846- if value [0 ] in PHRASE_ENDS :
1847- invalid_mailbox .append (ValueTerminal (value [0 ],
1848- 'misplaced-special' ))
1849- value = value [1 :]
1850- else :
1851- token , value = get_phrase (value )
1852- invalid_mailbox .append (token )
1869+ value = _find_phrase (invalid_mailbox , value , endchars )
18531870 return invalid_mailbox , value
18541871
18551872def get_mailbox_list (value ):
@@ -2196,10 +2213,7 @@ def parse_mime_version(value):
21962213 if not value :
21972214 mime_version .defects .append (errors .HeaderMissingRequiredValue (
21982215 "Expected MIME version number but found only CFWS" ))
2199- digits = ''
2200- while value and value [0 ] != '.' and value [0 ] not in CFWS_LEADER :
2201- digits += value [0 ]
2202- value = value [1 :]
2216+ digits , value = get_cfws_digits (value , CFWS_LEADER_WITH_DOT )
22032217 if not digits .isdigit ():
22042218 mime_version .defects .append (errors .InvalidHeaderDefect (
22052219 "Expected MIME major version number but found {!r}" .format (digits )))
@@ -2227,10 +2241,7 @@ def parse_mime_version(value):
22272241 mime_version .defects .append (errors .InvalidHeaderDefect (
22282242 "Incomplete MIME version; found only major number" ))
22292243 return mime_version
2230- digits = ''
2231- while value and value [0 ] not in CFWS_LEADER :
2232- digits += value [0 ]
2233- value = value [1 :]
2244+ digits , value = get_cfws_digits (value , CFWS_LEADER )
22342245 if not digits .isdigit ():
22352246 mime_version .defects .append (errors .InvalidHeaderDefect (
22362247 "Expected MIME minor version number but found {!r}" .format (digits )))
@@ -2255,14 +2266,7 @@ def get_invalid_parameter(value):
22552266
22562267 """
22572268 invalid_parameter = InvalidParameter ()
2258- while value and value [0 ] != ';' :
2259- if value [0 ] in PHRASE_ENDS :
2260- invalid_parameter .append (ValueTerminal (value [0 ],
2261- 'misplaced-special' ))
2262- value = value [1 :]
2263- else :
2264- token , value = get_phrase (value )
2265- invalid_parameter .append (token )
2269+ value = _find_phrase (invalid_parameter , value , ';' )
22662270 return invalid_parameter , value
22672271
22682272def get_ttext (value ):
@@ -2407,10 +2411,8 @@ def get_section(value):
24072411 if not value or not value [0 ].isdigit ():
24082412 raise errors .HeaderParseError ("Expected section number but "
24092413 "found {}" .format (value ))
2410- digits = ''
2411- while value and value [0 ].isdigit ():
2412- digits += value [0 ]
2413- value = value [1 :]
2414+ ind = next ((i for i , ch in enumerate (value ) if not ch .isdigit ()), 0 )
2415+ digits , value = value [:ind ], value [ind :]
24142416 if digits [0 ] == '0' and digits != '0' :
24152417 section .defects .append (errors .InvalidHeaderDefect (
24162418 "section number has an invalid leading 0" ))
@@ -2638,17 +2640,10 @@ def _find_mime_parameters(tokenlist, value):
26382640 """Do our best to find the parameters in an invalid MIME header
26392641
26402642 """
2641- while value and value [0 ] != ';' :
2642- if value [0 ] in PHRASE_ENDS :
2643- tokenlist .append (ValueTerminal (value [0 ], 'misplaced-special' ))
2644- value = value [1 :]
2645- else :
2646- token , value = get_phrase (value )
2647- tokenlist .append (token )
2648- if not value :
2649- return
2650- tokenlist .append (ValueTerminal (';' , 'parameter-separator' ))
2651- tokenlist .append (parse_mime_parameters (value [1 :]))
2643+ value = _find_phrase (tokenlist , value , ';' )
2644+ if value :
2645+ tokenlist .append (ValueTerminal (';' , 'parameter-separator' ))
2646+ tokenlist .append (parse_mime_parameters (value [1 :]))
26522647
26532648def parse_content_type_header (value ):
26542649 """ maintype "/" subtype *( ";" parameter )
@@ -2757,12 +2752,16 @@ def parse_content_transfer_encoding_header(value):
27572752 if not value :
27582753 return cte_header
27592754 while value :
2760- cte_header .defects .append (errors .InvalidHeaderDefect (
2761- "Extra text after content transfer encoding" ))
27622755 if value [0 ] in PHRASE_ENDS :
2763- cte_header .append (ValueTerminal (value [0 ], 'misplaced-special' ))
2764- value = value [1 :]
2756+ tmpvalue = value .lstrip (PHRASE_ENDS_CHARS )
2757+ for i in range (len (value ) - len (tmpvalue )):
2758+ cte_header .defects .append (errors .InvalidHeaderDefect (
2759+ "Extra text after content transfer encoding" ))
2760+ cte_header .append (ValueTerminal (value [i ], 'misplaced-special' ))
2761+ value = tmpvalue
27652762 else :
2763+ cte_header .defects .append (errors .InvalidHeaderDefect (
2764+ "Extra text after content transfer encoding" ))
27662765 token , value = get_phrase (value )
27672766 cte_header .append (token )
27682767 return cte_header
0 commit comments