From b624a0707a64d6438e7304f10412392beaa50cb8 Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Tue, 30 Jul 2024 12:05:23 -0700 Subject: [PATCH 1/5] Allow non-ASCII addr_spec in email.headerregistry.Address The email.headerregistry.Address constructor raised an error if addr_spec contained a non-ASCII character. (But it fully supports non-ASCII in the separate username and domain args.) This change removes the error for a non-ASCII addr_spec. --- Lib/email/headerregistry.py | 8 ++++++-- Lib/test/test_email/test_headerregistry.py | 22 ++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 543141dc427ebe..d8b06853f3c335 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -45,8 +45,12 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): raise ValueError("Invalid addr_spec; only '{}' " "could be parsed from '{}'".format( a_s, addr_spec)) - if a_s.all_defects: - raise a_s.all_defects[0] + relevant_defects = [ + defect for defect in a_s.all_defects + if not isinstance(defect, errors.NonASCIILocalPartDefect) + ] + if relevant_defects: + raise relevant_defects[0] username = a_s.local_part domain = a_s.domain self._display_name = display_name diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 4c0523f410332f..4f2f9ca53f2926 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1492,17 +1492,19 @@ def test_quoting(self): self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>') def test_il8n(self): - a = Address('Éric', 'wok', 'exàmple.com') + a = Address('Éric', 'wők', 'exàmple.com') self.assertEqual(a.display_name, 'Éric') - self.assertEqual(a.username, 'wok') + self.assertEqual(a.username, 'wők') self.assertEqual(a.domain, 'exàmple.com') - self.assertEqual(a.addr_spec, 'wok@exàmple.com') - self.assertEqual(str(a), 'Éric ') + self.assertEqual(a.addr_spec, 'wők@exàmple.com') + self.assertEqual(str(a), 'Éric ') - # XXX: there is an API design issue that needs to be solved here. - #def test_non_ascii_username_raises(self): - # with self.assertRaises(ValueError): - # Address('foo', 'wők', 'example.com') + def test_i18n_in_addr_spec(self): + a = Address(addr_spec='wők@exàmple.com') + self.assertEqual(a.username, 'wők') + self.assertEqual(a.domain, 'exàmple.com') + self.assertEqual(a.addr_spec, 'wők@exàmple.com') + self.assertEqual(str(a), 'wők@exàmple.com') def test_crlf_in_constructor_args_raises(self): cases = ( @@ -1523,10 +1525,6 @@ def test_crlf_in_constructor_args_raises(self): with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"): Address(**kwargs) - def test_non_ascii_username_in_addr_spec_raises(self): - with self.assertRaises(ValueError): - Address('foo', addr_spec='wők@example.com') - def test_address_addr_spec_and_username_raises(self): with self.assertRaises(TypeError): Address('foo', username='bing', addr_spec='bar@baz') From 6b587163596a999b0383c6ac23ec1953bd28bf39 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:19:34 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst diff --git a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst new file mode 100644 index 00000000000000..1270a962e99aea --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst @@ -0,0 +1 @@ +Allow non-ASCII addr_spec in email.headerregistry.Address constructor From f8a1296f7a64924e37d613bb338a54ae6c6ca1d1 Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Tue, 30 Jul 2024 12:05:23 -0700 Subject: [PATCH 3/5] Allow non-ASCII addr_spec in email.headerregistry.Address The email.headerregistry.Address constructor raised an error if addr_spec contained a non-ASCII character. (But it fully supports non-ASCII in the separate username and domain args.) This change removes the error for a non-ASCII addr_spec. --- Lib/email/headerregistry.py | 8 ++++++-- Lib/test/test_email/test_headerregistry.py | 22 ++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 543141dc427ebe..d8b06853f3c335 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -45,8 +45,12 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): raise ValueError("Invalid addr_spec; only '{}' " "could be parsed from '{}'".format( a_s, addr_spec)) - if a_s.all_defects: - raise a_s.all_defects[0] + relevant_defects = [ + defect for defect in a_s.all_defects + if not isinstance(defect, errors.NonASCIILocalPartDefect) + ] + if relevant_defects: + raise relevant_defects[0] username = a_s.local_part domain = a_s.domain self._display_name = display_name diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 4c0523f410332f..4f2f9ca53f2926 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1492,17 +1492,19 @@ def test_quoting(self): self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>') def test_il8n(self): - a = Address('Éric', 'wok', 'exàmple.com') + a = Address('Éric', 'wők', 'exàmple.com') self.assertEqual(a.display_name, 'Éric') - self.assertEqual(a.username, 'wok') + self.assertEqual(a.username, 'wők') self.assertEqual(a.domain, 'exàmple.com') - self.assertEqual(a.addr_spec, 'wok@exàmple.com') - self.assertEqual(str(a), 'Éric ') + self.assertEqual(a.addr_spec, 'wők@exàmple.com') + self.assertEqual(str(a), 'Éric ') - # XXX: there is an API design issue that needs to be solved here. - #def test_non_ascii_username_raises(self): - # with self.assertRaises(ValueError): - # Address('foo', 'wők', 'example.com') + def test_i18n_in_addr_spec(self): + a = Address(addr_spec='wők@exàmple.com') + self.assertEqual(a.username, 'wők') + self.assertEqual(a.domain, 'exàmple.com') + self.assertEqual(a.addr_spec, 'wők@exàmple.com') + self.assertEqual(str(a), 'wők@exàmple.com') def test_crlf_in_constructor_args_raises(self): cases = ( @@ -1523,10 +1525,6 @@ def test_crlf_in_constructor_args_raises(self): with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"): Address(**kwargs) - def test_non_ascii_username_in_addr_spec_raises(self): - with self.assertRaises(ValueError): - Address('foo', addr_spec='wők@example.com') - def test_address_addr_spec_and_username_raises(self): with self.assertRaises(TypeError): Address('foo', username='bing', addr_spec='bar@baz') From 7a0243f080aa5a8a7a2d15c3e25f8789338c8226 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:19:34 +0000 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst diff --git a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst new file mode 100644 index 00000000000000..1270a962e99aea --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst @@ -0,0 +1 @@ +Allow non-ASCII addr_spec in email.headerregistry.Address constructor From 9099083db8f774c1ed618e7a08f741795afe6012 Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Mon, 26 May 2025 15:36:48 -0700 Subject: [PATCH 5/5] Stop using NonASCIILocalPartDefect in email._header_value_parser When parsing email messages from Unicode strings (but not bytes), get_local_part() recorded a NonASCIILocalPartDefect for non-ASCII characters. RFC 5322 permits such addresses. This change: - removes the parse-time detection for a non-ASCII local-part (and a related test) - adds tests for passing a non-ASCII addr_spec to email.headerregistry.Address.__init__() - marks the (undocumented) email.errors.NonASCIILocalPartDefect as unused and deprecated This affected parsing email messages from Unicode strings (but not from bytes), and also prevented --- Lib/email/_header_value_parser.py | 5 ----- Lib/email/errors.py | 6 +++--- Lib/email/headerregistry.py | 8 ++------ Lib/test/test_email/test__header_value_parser.py | 11 ----------- .../2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst | 9 ++++++++- 5 files changed, 13 insertions(+), 26 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index f11fa83d45ed2d..2988941576cd39 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1493,11 +1493,6 @@ def get_local_part(value): local_part.defects.append(errors.ObsoleteHeaderDefect( "local-part is not a dot-atom (contains CFWS)")) local_part[0] = obs_local_part - try: - local_part.value.encode('ascii') - except UnicodeEncodeError: - local_part.defects.append(errors.NonASCIILocalPartDefect( - "local-part contains non-ASCII characters)")) return local_part, value def get_obs_local_part(value): diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 6bc744bd59c5bb..859307dd85be11 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -109,9 +109,9 @@ class ObsoleteHeaderDefect(HeaderDefect): """Header uses syntax declared obsolete by RFC 5322""" class NonASCIILocalPartDefect(HeaderDefect): - """local_part contains non-ASCII characters""" - # This defect only occurs during unicode parsing, not when - # parsing messages decoded from binary. + """Unused. Note: this error is deprecated and may be removed in the future.""" + # RFC 6532 permits a non-ASCII local-part. _header_value_parser previously + # treated this as a parse-time defect (when parsing Unicode, but not bytes). class InvalidDateDefect(HeaderDefect): """Header has unparsable or invalid date""" diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index d8b06853f3c335..543141dc427ebe 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -45,12 +45,8 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): raise ValueError("Invalid addr_spec; only '{}' " "could be parsed from '{}'".format( a_s, addr_spec)) - relevant_defects = [ - defect for defect in a_s.all_defects - if not isinstance(defect, errors.NonASCIILocalPartDefect) - ] - if relevant_defects: - raise relevant_defects[0] + if a_s.all_defects: + raise a_s.all_defects[0] username = a_s.local_part domain = a_s.domain self._display_name = display_name diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index fd4ac2c404ce47..251b6a47834366 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -1235,17 +1235,6 @@ def test_get_local_part_valid_and_invalid_qp_in_atom_list(self): '@example.com') self.assertEqual(local_part.local_part, r'\example\\ example') - def test_get_local_part_unicode_defect(self): - # Currently this only happens when parsing unicode, not when parsing - # stuff that was originally binary. - local_part = self._test_get_x(parser.get_local_part, - 'exámple@example.com', - 'exámple', - 'exámple', - [errors.NonASCIILocalPartDefect], - '@example.com') - self.assertEqual(local_part.local_part, 'exámple') - # get_dtext def test_get_dtext_only(self): diff --git a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst index 1270a962e99aea..87de4fade14dfb 100644 --- a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst +++ b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst @@ -1 +1,8 @@ -Allow non-ASCII addr_spec in email.headerregistry.Address constructor +The :mod:`email` module no longer treats email addresses with non-ASCII +characters as defects when parsing a Unicode string or in the ``addr_spec`` +parameter to :class:`email.headerregistry.Address`. :rfc:`5322` permits such +addresses, and they were already supported when parsing bytes and in the Address +``username`` parameter. + +The (undocumented) :exc:`!email.errors.NonASCIILocalPartDefect` is no longer +used and should be considered deprecated.