Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Lib/email/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def decode_header(header):
for n, w in enumerate(words):
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
droplist.append(n-1)
if n < len(words):
words[n] = (words[n][0].lstrip(), words[n][1], words[n][2])
for d in reversed(droplist):
del words[d]

Expand Down
75 changes: 75 additions & 0 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,5 +1055,80 @@ def test_string_payload_with_multipart_content_type(self):
self.assertEqual(list(attachments), [])


class TestHeaderDecoding(unittest.TestCase):
def test_encoded_word_splitting(self):
# Test case with accented characters that forces line splitting
address = "Bérénice-Amélie Rosemonde Dûbois-Bénard <[email protected]>"
message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

# Test with default policy
parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name,
"Bérénice-Amélie Rosemonde Dûbois-Bénard")

def test_multiple_encoded_words(self):
# Test multiple encoded-words in sequence
headers = [
("From", "André von Müller <[email protected]>"),
("To", "José García López <[email protected]>"),
("Subject", "Re: études à l'université"),
]

message = EmailMessage()
for header, value in headers:
message[header] = value
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
for header, value in headers:
with self.subTest(header=header):
self.assertEqual(str(parsed[header]), value)

def test_long_encoded_words(self):
# Test very long names that force multiple encoded-word splits
long_name = "Maximilian-Friedrich von Württemberg-Höchstadt III"
address = f"{long_name} <[email protected]>"

message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name, long_name)

def test_mixed_ascii_and_encoded(self):
# Test mixing ASCII and encoded-words
address = 'ACME Corp (アクメ) <[email protected]>'
message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name, 'ACME Corp (アクメ)')

def test_whitespace_handling(self):
# Test various whitespace scenarios between encoded-words
headers = [
("From", "María José <[email protected]>"), # Double space
("To", "André\tvon\tMüller <[email protected]>"), # Tabs
("Cc", "José\n García <[email protected]>"), # Newline
]

message = EmailMessage()
for header, value in headers:
message[header] = value
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
for header, value in headers:
with self.subTest(header=header):
self.assertEqual(str(parsed[header]), value)


if __name__ == '__main__':
unittest.main()
Loading