Skip to content

Commit c02bc47

Browse files
committed
fix: redundant WSP when parsed email
1 parent 759a54d commit c02bc47

File tree

2 files changed

+55
-6
lines changed

2 files changed

+55
-6
lines changed

Lib/email/policy.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
import re
66
import sys
7-
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
8-
from email.utils import _has_surrogates
9-
from email.headerregistry import HeaderRegistry as HeaderRegistry
7+
from email._policybase import Compat32, Policy, _extend_docstrings, compat32
108
from email.contentmanager import raw_data_manager
9+
from email.headerregistry import HeaderRegistry as HeaderRegistry
1110
from email.message import EmailMessage
11+
from email.utils import _has_surrogates
1212

1313
__all__ = [
1414
'Compat32',
@@ -125,7 +125,27 @@ def header_source_parse(self, sourcelines):
125125
126126
"""
127127
name, value = sourcelines[0].split(':', 1)
128-
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
128+
129+
# Fixed: https://github.com/python/cpython/issues/124452
130+
#
131+
# Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'.
132+
# If there is no WSP, it can't figure out how to wrap the text.
133+
# Therefore, it places the entire value directly after '\n', and because
134+
# there is a WSP after '<HeaderName>:', the WSP will be moved to the front
135+
# of the value according to RFC5322, section 2.2.3.
136+
#
137+
# However, the WSP is not part of the value; therefore, we must
138+
# remove it.
139+
140+
# Remove leading WSP in the first line only if there no value in the
141+
# first line, and has values after that
142+
remove_wsp = not value.strip() and len(sourcelines) > 1
143+
144+
value = value.lstrip(' \t')
145+
if remove_wsp and sourcelines[1][0] in ' \t':
146+
sourcelines[1] = sourcelines[1][1:]
147+
148+
value += ''.join(sourcelines[1:])
129149
return (name, value.rstrip('\r\n'))
130150

131151
def header_store_parse(self, name, value):

Lib/test/test_email/test_message.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import unittest
21
import textwrap
3-
from email import policy, message_from_string
2+
import unittest
3+
from email import message_from_bytes, message_from_string, policy
44
from email.message import EmailMessage, MIMEPart
55
from test.test_email import TestEmailBase, parameterize
66

@@ -957,6 +957,35 @@ def test_folding_with_utf8_encoding_8(self):
957957
b'123456789 123456789 123456789 123456789 '
958958
b'123456789-123456789\n 123456789 Hello '
959959
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
960+
961+
962+
def test_folding_with_short_nospace_1(self):
963+
# bpo-36520
964+
#
965+
# Fold a line that contains a long whitespace after
966+
# the fold point.
967+
968+
m = EmailMessage(policy.default)
969+
m['Message-ID'] = '12345678912345678123456789123456789123456789'
970+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
971+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
972+
973+
def test_folding_with_long_nospace_1(self):
974+
# Fixed: https://github.com/python/cpython/issues/124452
975+
#
976+
# When the value is too long, it should be converted back
977+
# to its original form without any modifications.
978+
979+
m = EmailMessage(policy.default)
980+
m['Message-ID'] = '12345678912345678123456789123456789123456789'\
981+
'12345678912345678123456789123456789123456789'
982+
self.assertEqual(m.as_bytes(),
983+
b'Message-ID:\n 12345678912345678123456789123456'\
984+
b'78912345678912345678912345678123456789123456789'\
985+
b'123456789\n\n')
986+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
987+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
988+
960989

961990
def test_get_body_malformed(self):
962991
"""test for bpo-42892"""

0 commit comments

Comments
 (0)