Skip to content

Commit a4e1f04

Browse files
committed
email: Fix RFC 2047 header decoding with line folding
1 parent 39e69a7 commit a4e1f04

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

Lib/email/header.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ def decode_header(header):
102102
for n, w in enumerate(words):
103103
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
104104
droplist.append(n-1)
105+
if n < len(words):
106+
words[n] = (words[n][0].lstrip(), words[n][1], words[n][2])
105107
for d in reversed(droplist):
106108
del words[d]
107109

Lib/test/test_email/test_message.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,5 +1055,80 @@ def test_string_payload_with_multipart_content_type(self):
10551055
self.assertEqual(list(attachments), [])
10561056

10571057

1058+
class TestHeaderDecoding(unittest.TestCase):
1059+
def test_encoded_word_splitting(self):
1060+
# Test case with accented characters that forces line splitting
1061+
address = "Bérénice-Amélie Rosemonde Dûbois-Bénard <rose@example.com>"
1062+
message = EmailMessage()
1063+
message["From"] = address
1064+
message_bytes = message.as_bytes()
1065+
1066+
# Test with default policy
1067+
parsed = message_from_bytes(message_bytes, policy=policy.default)
1068+
self.assertEqual(str(parsed["From"].addresses[0]), address)
1069+
self.assertEqual(parsed["From"].addresses[0].display_name,
1070+
"Bérénice-Amélie Rosemonde Dûbois-Bénard")
1071+
1072+
def test_multiple_encoded_words(self):
1073+
# Test multiple encoded-words in sequence
1074+
headers = [
1075+
("From", "André von Müller <andre@example.com>"),
1076+
("To", "José García López <jose@example.com>"),
1077+
("Subject", "Re: études à l'université"),
1078+
]
1079+
1080+
message = EmailMessage()
1081+
for header, value in headers:
1082+
message[header] = value
1083+
message_bytes = message.as_bytes()
1084+
1085+
parsed = message_from_bytes(message_bytes, policy=policy.default)
1086+
for header, value in headers:
1087+
with self.subTest(header=header):
1088+
self.assertEqual(str(parsed[header]), value)
1089+
1090+
def test_long_encoded_words(self):
1091+
# Test very long names that force multiple encoded-word splits
1092+
long_name = "Maximilian-Friedrich von Württemberg-Höchstadt III"
1093+
address = f"{long_name} <max@example.com>"
1094+
1095+
message = EmailMessage()
1096+
message["From"] = address
1097+
message_bytes = message.as_bytes()
1098+
1099+
parsed = message_from_bytes(message_bytes, policy=policy.default)
1100+
self.assertEqual(str(parsed["From"].addresses[0]), address)
1101+
self.assertEqual(parsed["From"].addresses[0].display_name, long_name)
1102+
1103+
def test_mixed_ascii_and_encoded(self):
1104+
# Test mixing ASCII and encoded-words
1105+
address = 'ACME Corp (アクメ) <info@example.com>'
1106+
message = EmailMessage()
1107+
message["From"] = address
1108+
message_bytes = message.as_bytes()
1109+
1110+
parsed = message_from_bytes(message_bytes, policy=policy.default)
1111+
self.assertEqual(str(parsed["From"].addresses[0]), address)
1112+
self.assertEqual(parsed["From"].addresses[0].display_name, 'ACME Corp (アクメ)')
1113+
1114+
def test_whitespace_handling(self):
1115+
# Test various whitespace scenarios between encoded-words
1116+
headers = [
1117+
("From", "María José <maria.jose@example.com>"), # Double space
1118+
("To", "André\tvon\tMüller <andre@example.com>"), # Tabs
1119+
("Cc", "José\n García <jose@example.com>"), # Newline
1120+
]
1121+
1122+
message = EmailMessage()
1123+
for header, value in headers:
1124+
message[header] = value
1125+
message_bytes = message.as_bytes()
1126+
1127+
parsed = message_from_bytes(message_bytes, policy=policy.default)
1128+
for header, value in headers:
1129+
with self.subTest(header=header):
1130+
self.assertEqual(str(parsed[header]), value)
1131+
1132+
10581133
if __name__ == '__main__':
10591134
unittest.main()

0 commit comments

Comments
 (0)