diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 5f9aa9fb091fa2..c9f0d743090e54 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -302,12 +302,12 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 46b7de5bb6d8ae..6e109b65011a44 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -119,13 +119,13 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. (This is the same as Compat32). """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 034f7626c1fc7c..96979db27f3a21 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1,6 +1,6 @@ -import unittest import textwrap -from email import policy, message_from_string +import unittest +from email import message_from_bytes, message_from_string, policy from email.message import EmailMessage, MIMEPart from test.test_email import TestEmailBase, parameterize @@ -958,6 +958,52 @@ def test_folding_with_utf8_encoding_8(self): b'123456789-123456789\n 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') + def test_folding_with_short_nospace_1(self): + # bpo-36520 + # + # Fold a line that contains a long whitespace after + # the fold point. + + m = EmailMessage(policy.default) + m['Message-ID'] = '123456789' * 3 + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_default_policy_1(self): + # Fixed: https://github.com/python/cpython/issues/124452 + # + # When the value is too long, it should be converted back + # to its original form without any modifications. + + m = EmailMessage(policy.default) + message = '123456789' * 10 + m['Message-ID'] = message + self.assertEqual(m.as_bytes(), + f'Message-ID:\n {message}\n\n'.encode()) + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_compat32_policy_1(self): + m = EmailMessage(policy.compat32) + message = '123456789' * 10 + m['Message-ID'] = message + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_smtp_policy_1(self): + m = EmailMessage(policy.SMTP) + message = '123456789' * 10 + m['Message-ID'] = message + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_http_policy_1(self): + m = EmailMessage(policy.HTTP) + message = '123456789' * 10 + m['Message-ID'] = message + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_get_body_malformed(self): """test for bpo-42892""" msg = textwrap.dedent("""\ diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst new file mode 100644 index 00000000000000..b0d63794022db4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -0,0 +1,4 @@ +Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and +:meth:`email.policy.Compat32.header_source_parse` that introduced spurious +leading whitespaces into header values when the header includes a newline +character after the header name delimiter (``:``) and before the value.