diff --git a/Lib/email/header.py b/Lib/email/header.py index 113a81f41314ec..ea9916476836da 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -102,6 +102,8 @@ def decode_header(header): for n, w in enumerate(words): if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): droplist.append(n-1) + if n < len(words): + words[n] = (words[n][0].lstrip(), words[n][1], words[n][2]) for d in reversed(droplist): del words[d] diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 96979db27f3a21..7c2a7785519385 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1055,5 +1055,80 @@ def test_string_payload_with_multipart_content_type(self): self.assertEqual(list(attachments), []) +class TestHeaderDecoding(unittest.TestCase): + def test_encoded_word_splitting(self): + # Test case with accented characters that forces line splitting + address = "Bérénice-Amélie Rosemonde Dûbois-Bénard " + message = EmailMessage() + message["From"] = address + message_bytes = message.as_bytes() + + # Test with default policy + parsed = message_from_bytes(message_bytes, policy=policy.default) + self.assertEqual(str(parsed["From"].addresses[0]), address) + self.assertEqual(parsed["From"].addresses[0].display_name, + "Bérénice-Amélie Rosemonde Dûbois-Bénard") + + def test_multiple_encoded_words(self): + # Test multiple encoded-words in sequence + headers = [ + ("From", "André von Müller "), + ("To", "José García López "), + ("Subject", "Re: études à l'université"), + ] + + message = EmailMessage() + for header, value in headers: + message[header] = value + message_bytes = message.as_bytes() + + parsed = message_from_bytes(message_bytes, policy=policy.default) + for header, value in headers: + with self.subTest(header=header): + self.assertEqual(str(parsed[header]), value) + + def test_long_encoded_words(self): + # Test very long names that force multiple encoded-word splits + long_name = "Maximilian-Friedrich von Württemberg-Höchstadt III" + address = f"{long_name} " + + message = EmailMessage() + message["From"] = address + message_bytes = message.as_bytes() + + parsed = message_from_bytes(message_bytes, policy=policy.default) + self.assertEqual(str(parsed["From"].addresses[0]), address) + self.assertEqual(parsed["From"].addresses[0].display_name, long_name) + + def test_mixed_ascii_and_encoded(self): + # Test mixing ASCII and encoded-words + address = 'ACME Corp (アクメ) ' + message = EmailMessage() + message["From"] = address + message_bytes = message.as_bytes() + + parsed = message_from_bytes(message_bytes, policy=policy.default) + self.assertEqual(str(parsed["From"].addresses[0]), address) + self.assertEqual(parsed["From"].addresses[0].display_name, 'ACME Corp (アクメ)') + + def test_whitespace_handling(self): + # Test various whitespace scenarios between encoded-words + headers = [ + ("From", "María José "), # Double space + ("To", "André\tvon\tMüller "), # Tabs + ("Cc", "José\n García "), # Newline + ] + + message = EmailMessage() + for header, value in headers: + message[header] = value + message_bytes = message.as_bytes() + + parsed = message_from_bytes(message_bytes, policy=policy.default) + for header, value in headers: + with self.subTest(header=header): + self.assertEqual(str(parsed[header]), value) + + if __name__ == '__main__': unittest.main()