python · bitdancer · Nov 16, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
@@ -307,7 +307,29 @@ def header_source_parse(self, sourcelines):
 
         """
         name, value = sourcelines[0].split(':', 1)
-        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+
+        # Fixed: https://github.com/python/cpython/issues/124452
+        #
+        # Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'.
+        # If there is no WSP, it can't figure out how to wrap the text.
+        # Therefore, it places the entire value directly after '\n', and because
+        # there is a WSP after '<HeaderName>:', the WSP will be moved to the front
+        # of the value according to RFC5322, section 2.2.3.
+        #
+        # However, the WSP is not part of the value; therefore, we must
+        # remove it.
+
+        no_first_value = value.strip() == '' and len(sourcelines) > 1
+
+        # When using the compat32 policy, the value is '\n'. Therefore,
+        # use an empty string if there is no value (without WSP and CRLF)
+        # on the first line
+        value = '' if no_first_value else value.lstrip(' \t')
+
+        if no_first_value and sourcelines[1][0] in ' \t':
+            sourcelines[1] = sourcelines[1][1:]
+
+        value += ''.join(sourcelines[1:])
         return (name, value.rstrip('\r\n'))
 
     def header_store_parse(self, name, value):

@@ -125,7 +125,14 @@ def header_source_parse(self, sourcelines):
 
         """
         name, value = sourcelines[0].split(':', 1)
-        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        no_first_value = value.strip() == '' and len(sourcelines) > 1
+
+        value = '' if no_first_value else value.lstrip(' \t')
+
+        if no_first_value and sourcelines[1][0] in ' \t':
+            sourcelines[1] = sourcelines[1][1:]
+
+        value += ''.join(sourcelines[1:])
         return (name, value.rstrip('\r\n'))
 
     def header_store_parse(self, name, value):

@@ -1,6 +1,6 @@
-import unittest
 import textwrap
-from email import policy, message_from_string
+import unittest
+from email import message_from_bytes, message_from_string, policy
 from email.message import EmailMessage, MIMEPart
 from test.test_email import TestEmailBase, parameterize
 
@@ -958,6 +958,46 @@ def test_folding_with_utf8_encoding_8(self):
                          b'123456789-123456789\n 123456789 Hello '
                          b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
 
+
+    def test_folding_with_short_nospace_1(self):
+        # bpo-36520
+        #
+        # Fold a line that contains a long whitespace after
+        # the fold point.
+
+        m = EmailMessage(policy.default)
+        m['Message-ID'] = '12345678912345678123456789123456789123456789'
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_default_policy_1(self):
+        # Fixed: https://github.com/python/cpython/issues/124452
+        #
+        # When the value is too long, it should be converted back
+        # to its original form without any modifications.
+
+        m = EmailMessage(policy.default)
+        m['Message-ID'] = '12345678912345678123456789123456789123456789'\
+                        '12345678912345678123456789123456789123456789'
+        self.assertEqual(m.as_bytes(),
+                         b'Message-ID:\n 12345678912345678123456789123456'\
+                         b'78912345678912345678912345678123456789123456789'\
+                         b'123456789\n\n')
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_compat32_policy_1(self):
+        # Fixed: https://github.com/python/cpython/issues/124452
+        #
+        # When the value is too long, it should be converted back
+        # to its original form without any modifications.
+
+        m = EmailMessage(policy.compat32)
+        m['Message-ID'] = '12345678912345678123456789123456789123456789'\
+                        '12345678912345678123456789123456789123456789'
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
     def test_get_body_malformed(self):
         """test for bpo-42892"""
         msg = textwrap.dedent("""\

diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst
@@ -0,0 +1,3 @@
+Fix the mismatched email headers after converting them to bytes and parsing them into an email message.
+This issue only occurs with long text without any spaces.
+Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage.