Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion Lib/email/_policybase.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,29 @@ def header_source_parse(self, sourcelines):

"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])

# Fixed: https://github.com/python/cpython/issues/124452
#
# Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'.
# If there is no WSP, it can't figure out how to wrap the text.
# Therefore, it places the entire value directly after '\n', and because
# there is a WSP after '<HeaderName>:', the WSP will be moved to the front
# of the value according to RFC5322, section 2.2.3.
#
# However, the WSP is not part of the value; therefore, we must
# remove it.

no_first_value = value.strip() == '' and len(sourcelines) > 1

# When using the compat32 policy, the value is '\n'. Therefore,
# use an empty string if there is no value (without WSP and CRLF)
# on the first line
value = '' if no_first_value else value.lstrip(' \t')

if no_first_value and sourcelines[1][0] in ' \t':
sourcelines[1] = sourcelines[1][1:]

value += ''.join(sourcelines[1:])
return (name, value.rstrip('\r\n'))

def header_store_parse(self, name, value):
Expand Down
9 changes: 8 additions & 1 deletion Lib/email/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,14 @@ def header_source_parse(self, sourcelines):

"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
no_first_value = value.strip() == '' and len(sourcelines) > 1

value = '' if no_first_value else value.lstrip(' \t')

if no_first_value and sourcelines[1][0] in ' \t':
sourcelines[1] = sourcelines[1][1:]

value += ''.join(sourcelines[1:])
return (name, value.rstrip('\r\n'))

def header_store_parse(self, name, value):
Expand Down
44 changes: 42 additions & 2 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import textwrap
from email import policy, message_from_string
import unittest
from email import message_from_bytes, message_from_string, policy
from email.message import EmailMessage, MIMEPart
from test.test_email import TestEmailBase, parameterize

Expand Down Expand Up @@ -958,6 +958,46 @@ def test_folding_with_utf8_encoding_8(self):
b'123456789-123456789\n 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')


def test_folding_with_short_nospace_1(self):
# bpo-36520
#
# Fold a line that contains a long whitespace after
# the fold point.

m = EmailMessage(policy.default)
m['Message-ID'] = '12345678912345678123456789123456789123456789'
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_default_policy_1(self):
# Fixed: https://github.com/python/cpython/issues/124452
#
# When the value is too long, it should be converted back
# to its original form without any modifications.

m = EmailMessage(policy.default)
m['Message-ID'] = '12345678912345678123456789123456789123456789'\
'12345678912345678123456789123456789123456789'
self.assertEqual(m.as_bytes(),
b'Message-ID:\n 12345678912345678123456789123456'\
b'78912345678912345678912345678123456789123456789'\
b'123456789\n\n')
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_compat32_policy_1(self):
# Fixed: https://github.com/python/cpython/issues/124452
#
# When the value is too long, it should be converted back
# to its original form without any modifications.

m = EmailMessage(policy.compat32)
m['Message-ID'] = '12345678912345678123456789123456789123456789'\
'12345678912345678123456789123456789123456789'
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_get_body_malformed(self):
"""test for bpo-42892"""
msg = textwrap.dedent("""\
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix the mismatched email headers after converting them to bytes and parsing them into an email message.
This issue only occurs with long text without any spaces.
Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage.
Loading