Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Doc/library/email.policy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,17 @@ added matters. To illustrate::
.. attribute:: utf8

If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
headers by encoding them as "encoded words". If ``True``, follow
:rfc:`6532` and use ``utf-8`` encoding for headers. Messages
headers by encoding them as :rfc:`2047` "encoded words". If ``True``,
follow :rfc:`6532` and use ``utf-8`` encoding for headers. Messages
formatted in this way may be passed to SMTP servers that support
the ``SMTPUTF8`` extension (:rfc:`6531`).

.. versionchanged:: 3.13
If ``False``, the generator will raise a ``ValueError`` if any email
address contains non-ASCII characters. To send to a non-ASCII domain
with ``utf8=False``, encode the domain using the third-party
:pypi:`idna` module or :mod:`encodings.idna`. No RFC allows a non-ASCII
username ("localpart") in an email address with ``utf8=False``.

.. attribute:: refold_source

Expand Down
11 changes: 11 additions & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2829,6 +2829,17 @@ def _refold_parse_tree(parse_tree, *, policy):
_fold_mime_parameters(part, lines, maxlen, encoding)
continue

if want_encoding and part.token_type == 'addr-spec':
# RFC2047 forbids encoded-word in any part of an addr-spec.
if charset == 'unknown-8bit':
# Non-ASCII addr-spec came from parsed message; leave unchanged.
want_encoding = False
else:
raise ValueError(
"Non-ASCII address requires policy with utf8=True:"
" '{}'".format(part)
)

if want_encoding and not wrap_as_ew_blocked:
if not part.as_ew_allowed:
want_encoding = False
Expand Down
58 changes: 56 additions & 2 deletions Lib/test/test_email/test_generator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import re
import textwrap
import unittest
from email import message_from_string, message_from_bytes
Expand Down Expand Up @@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), self.typ(expected))

def test_non_ascii_addr_spec_raises(self):
# RFC2047 encoded-word is not permitted in any part of an addr-spec.
# (See also test_non_ascii_addr_spec_preserved below.)
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
cases = [
'wő[email protected]',
'wok@exàmple.com',
'wők@exàmple.com',
'"Name, for display" <wő[email protected]>',
'Näyttönimi <wő[email protected]>',
]
for address in cases:
with self.subTest(address=address):
msg = EmailMessage()
msg['To'] = address
expected_error = re.escape(
"Non-ASCII address requires policy with utf8=True:"
" '{}'".format(msg['To'].addresses[0].addr_spec)
)
with self.assertRaisesRegex(ValueError, expected_error):
g.flatten(msg)


class TestGenerator(TestGeneratorBase, TestEmailBase):

Expand Down Expand Up @@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self):

def test_smtputf8_policy(self):
msg = EmailMessage()
msg['From'] = "Páolo <főo@bar.com>"
msg['From'] = "Páolo <főo@bàr.com>"
msg['To'] = 'Dinsdale'
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
msg.set_content("oh là là, know what I mean, know what I mean?")
expected = textwrap.dedent("""\
From: Páolo <főo@bar.com>
From: Páolo <főo@bàr.com>
To: Dinsdale
Subject: Nudge nudge, wink, wink \u1F609
Content-Type: text/plain; charset="utf-8"
Expand Down Expand Up @@ -472,6 +495,37 @@ def test_smtp_policy(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_non_ascii_addr_spec_preserved(self):
# A defective non-ASCII addr-spec parsed from the original
# message is left unchanged when flattening.
# (See also test_non_ascii_addr_spec_raises above.)
source = (
'To: jö[email protected], "But a long name still works with refold_source" <jö[email protected]>'
).encode()
expected = (
b'To: j\xc3\[email protected],\n'
b' "But a long name still works with refold_source" <j\xc3\[email protected]>\n'
b'\n'
)
msg = message_from_bytes(source, policy=policy.default)
s = io.BytesIO()
g = BytesGenerator(s, policy=policy.default)
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_idna_encoding_preserved(self):
# Nothing tries to decode a pre-encoded IDNA domain.
msg = EmailMessage()
msg["To"] = Address(
username='jörg',
domain='☕.example'.encode('idna').decode() # IDNA 2003
)
expected = 'To: jö[email protected]\n\n'.encode()
s = io.BytesIO()
g = BytesGenerator(s, policy=policy.default.clone(utf8=True))
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Stop incorrectly using RFC 2047 "encoded words" for email addresses with
non-ASCII characters when email.generator is called using a policy with
``utf8=False``.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Stop incorrectly using RFC 2047 "encoded words" for email addresses with
non-ASCII characters when email.generator is called using a policy with
``utf8=False``.
Loading