diff --git a/Lib/email/message.py b/Lib/email/message.py index 08192c50a8ff5c..551b8dcd680ad9 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -302,7 +302,10 @@ def get_payload(self, i=None, decode=False): return payload if isinstance(payload, str): try: - bpayload = payload.encode('ascii', 'surrogateescape') + if cte == '8bit': + bpayload = payload.encode(self.get_param('charset', 'ascii'), 'surrogateescape') + else: + bpayload = payload.encode('ascii', 'surrogateescape') except UnicodeEncodeError: # This won't happen for RFC compliant messages (messages # containing only ASCII code points in the unicode input). diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 65ddbabcaa1997..346a54801f6d01 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -673,7 +673,7 @@ def test_broken_unicode_payload(self): x = 'this is a br\xf6ken thing to do' msg = Message() msg['content-type'] = 'text/plain' - msg['content-transfer-encoding'] = '8bit' + msg['content-transfer-encoding'] = '7bit' msg.set_payload(x) self.assertEqual(msg.get_payload(decode=True), bytes(x, 'raw-unicode-escape')) @@ -750,6 +750,15 @@ def test_binary_base64_payload(self): b'foo\xe6\x96\x87bar', 'get_payload returns wrong result with charset %s.' % charset) + def test_8bit_utf8_payload(self): + x = 'this is the hötel' + msg = Message() + msg['content-type'] = 'text/plain' + msg['content-transfer-encoding'] = '8bit' + msg.set_payload(x) + self.assertEqual(msg.get_payload(decode=True), + bytes(x, 'utf-8')) + def test_binary_uuencode_payload(self): for charset in ('latin-1', 'ascii'): for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): diff --git a/Misc/NEWS.d/next/Library/2023-06-05-02-37-52.gh-issue-105285.fwUxuR.rst b/Misc/NEWS.d/next/Library/2023-06-05-02-37-52.gh-issue-105285.fwUxuR.rst new file mode 100644 index 00000000000000..7cce891000e0c4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-06-05-02-37-52.gh-issue-105285.fwUxuR.rst @@ -0,0 +1 @@ +Attempt to decode emails with UTF-8 when the CTE is ``8bit``.