Skip to content

Commit 61b1299

Browse files
committed
Cherry-picked b4bcc06 for Email
1 parent f2707a6 commit 61b1299

File tree

6 files changed

+186
-11
lines changed

6 files changed

+186
-11
lines changed

Doc/library/email.utils.rst

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ of the new API.
6767
*email address* parts. Returns a tuple of that information, unless the parse
6868
fails, in which case a 2-tuple of ``('', '')`` is returned.
6969

70+
.. versionchanged:: 3.7
71+
For security reasons, addresses that were ambiguous and could parse into
72+
multiple different addresses now cause ``('', '')`` to be returned
73+
instead of only one of the *potential* addresses.
74+
7075

7176
.. function:: formataddr(pair, charset='utf-8')
7277

@@ -89,7 +94,7 @@ of the new API.
8994
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
9095
*fieldvalues* is a sequence of header field values as might be returned by
9196
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
92-
example that gets all the recipients of a message::
97+
example that gets all the recipients of a message:
9398

9499
from email.utils import getaddresses
95100

@@ -99,6 +104,25 @@ of the new API.
99104
resent_ccs = msg.get_all('resent-cc', [])
100105
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
101106

107+
When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
108+
is returned in its place. Other errors in parsing the list of
109+
addresses such as a fieldvalue seemingly parsing into multiple
110+
addresses may result in a list containing a single empty 2-tuple
111+
``[('', '')]`` being returned rather than returning potentially
112+
invalid output.
113+
114+
Example malformed input parsing:
115+
116+
.. doctest::
117+
118+
>>> from email.utils import getaddresses
119+
120+
[('', '')]
121+
122+
.. versionchanged:: 3.7
123+
The 2-tuple of ``('', '')`` in the returned values when parsing
124+
fails were added as to address a security issue.
125+
102126

103127
.. function:: parsedate(date)
104128

Doc/whatsnew/3.7.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,15 @@ therefore included in source distributions.
900900
(Contributed by Ryan Gonzalez in :issue:`11913`.)
901901

902902

903+
email
904+
-----
905+
906+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
907+
``('', '')`` 2-tuples in more situations where invalid email addresses are
908+
encountered instead of potentially inaccurate values.
909+
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
910+
911+
903912
enum
904913
----
905914

Lib/email/utils.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def formataddr(pair, charset='utf-8'):
8181
If the first element of pair is false, then the second element is
8282
returned unmodified.
8383
84-
Optional charset if given is the character set that is used to encode
84+
The optional charset is the character set that is used to encode
8585
realname in case realname is not ASCII safe. Can be an instance of str or
8686
a Charset-like object which has a header_encode method. Default is
8787
'utf-8'.
@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'):
106106
return address
107107

108108

109+
def _pre_parse_validation(email_header_fields):
110+
accepted_values = []
111+
for v in email_header_fields:
112+
s = v.replace('\\(', '').replace('\\)', '')
113+
if s.count('(') != s.count(')'):
114+
v = "('', '')"
115+
accepted_values.append(v)
116+
117+
return accepted_values
118+
119+
120+
def _post_parse_validation(parsed_email_header_tuples):
121+
accepted_values = []
122+
# The parser would have parsed a correctly formatted domain-literal
123+
# The existence of an [ after parsing indicates a parsing failure
124+
for v in parsed_email_header_tuples:
125+
if '[' in v[1]:
126+
v = ('', '')
127+
accepted_values.append(v)
128+
129+
return accepted_values
130+
109131

110132
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(fieldvalues)
133+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
134+
135+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
136+
its place.
137+
138+
If the resulting list of parsed address is not the same as the number of
139+
fieldvalues in the input list a parsing error has occurred. A list
140+
containing a single empty 2-tuple [('', '')] is returned in its place.
141+
This is done to avoid invalid output.
142+
"""
143+
fieldvalues = [str(v) for v in fieldvalues]
144+
fieldvalues = _pre_parse_validation(fieldvalues)
145+
all = COMMASPACE.join(v for v in fieldvalues)
113146
a = _AddressList(all)
114-
return a.addresslist
147+
result = _post_parse_validation(a.addresslist)
148+
149+
n = 0
150+
for v in fieldvalues:
151+
n += v.count(',') + 1
152+
153+
if len(result) != n:
154+
return [('', '')]
155+
156+
return result
115157

116158

117159
def _format_timetuple_and_zone(timetuple, zone):
@@ -209,9 +251,18 @@ def parseaddr(addr):
209251
Return a tuple of realname and email address, unless the parse fails, in
210252
which case return a 2-tuple of ('', '').
211253
"""
212-
addrs = _AddressList(addr).addresslist
213-
if not addrs:
214-
return '', ''
254+
if isinstance(addr, list):
255+
addr = addr[0]
256+
257+
if not isinstance(addr, str):
258+
return ('', '')
259+
260+
addr = _pre_parse_validation([addr])[0]
261+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
262+
263+
if not addrs or len(addrs) > 1:
264+
return ('', '')
265+
215266
return addrs[0]
216267

217268

Lib/test/test_email/test_email.py

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3213,15 +3213,90 @@ def test_getaddresses(self):
32133213
[('Al Person', '[email protected]'),
32143214
('Bud Person', '[email protected]')])
32153215

3216+
def test_getaddresses_parsing_errors(self):
3217+
"""Test for parsing errors from CVE-2023-27043"""
3218+
eq = self.assertEqual
3219+
eq(utils.getaddresses(['[email protected](<[email protected]>']),
3220+
[('', '')])
3221+
eq(utils.getaddresses(['[email protected])<[email protected]>']),
3222+
[('', '')])
3223+
eq(utils.getaddresses(['[email protected]<<[email protected]>']),
3224+
[('', '')])
3225+
eq(utils.getaddresses(['[email protected]><[email protected]>']),
3226+
[('', '')])
3227+
eq(utils.getaddresses(['[email protected]@<[email protected]>']),
3228+
[('', '')])
3229+
eq(utils.getaddresses(['[email protected],<[email protected]>']),
3230+
3231+
eq(utils.getaddresses(['[email protected];<[email protected]>']),
3232+
[('', '')])
3233+
eq(utils.getaddresses(['[email protected]:<[email protected]>']),
3234+
[('', '')])
3235+
eq(utils.getaddresses(['[email protected].<[email protected]>']),
3236+
[('', '')])
3237+
eq(utils.getaddresses(['[email protected]"<[email protected]>']),
3238+
[('', '')])
3239+
eq(utils.getaddresses(['[email protected][<[email protected]>']),
3240+
[('', '')])
3241+
eq(utils.getaddresses(['[email protected]]<[email protected]>']),
3242+
[('', '')])
3243+
3244+
def test_parseaddr_parsing_errors(self):
3245+
"""Test for parsing errors from CVE-2023-27043"""
3246+
eq = self.assertEqual
3247+
eq(utils.parseaddr(['[email protected](<[email protected]>']),
3248+
('', ''))
3249+
eq(utils.parseaddr(['[email protected])<[email protected]>']),
3250+
('', ''))
3251+
eq(utils.parseaddr(['[email protected]<<[email protected]>']),
3252+
('', ''))
3253+
eq(utils.parseaddr(['[email protected]><[email protected]>']),
3254+
('', ''))
3255+
eq(utils.parseaddr(['[email protected]@<[email protected]>']),
3256+
('', ''))
3257+
eq(utils.parseaddr(['[email protected],<[email protected]>']),
3258+
('', ''))
3259+
eq(utils.parseaddr(['[email protected];<[email protected]>']),
3260+
('', ''))
3261+
eq(utils.parseaddr(['[email protected]:<[email protected]>']),
3262+
('', ''))
3263+
eq(utils.parseaddr(['[email protected].<[email protected]>']),
3264+
('', ''))
3265+
eq(utils.parseaddr(['[email protected]"<[email protected]>']),
3266+
('', ''))
3267+
eq(utils.parseaddr(['[email protected][<[email protected]>']),
3268+
('', ''))
3269+
eq(utils.parseaddr(['[email protected]]<[email protected]>']),
3270+
('', ''))
3271+
32163272
def test_getaddresses_nasty(self):
32173273
eq = self.assertEqual
32183274
eq(utils.getaddresses(['foo: ;']), [('', '')])
3219-
eq(utils.getaddresses(
3220-
['[]*-- =~$']),
3221-
[('', ''), ('', ''), ('', '*--')])
3275+
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
32223276
eq(utils.getaddresses(
32233277
['foo: ;', '"Jason R. Mastaler" <[email protected]>']),
32243278
[('', ''), ('Jason R. Mastaler', '[email protected]')])
3279+
eq(utils.getaddresses(
3280+
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
3281+
[('Pete (A nice ) chap his account his host)', '[email protected]')])
3282+
eq(utils.getaddresses(
3283+
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
3284+
[('', '')])
3285+
eq(utils.getaddresses(
3286+
['Mary <@machine.tld:[email protected]>, , jdoe@test . example']),
3287+
[('Mary', '[email protected]'), ('', ''), ('', '[email protected]')])
3288+
eq(utils.getaddresses(
3289+
['John Doe <jdoe@machine(comment). example>']),
3290+
[('John Doe (comment)', '[email protected]')])
3291+
eq(utils.getaddresses(
3292+
['"Mary Smith: Personal Account" <[email protected]>']),
3293+
[('Mary Smith: Personal Account', '[email protected]')])
3294+
eq(utils.getaddresses(
3295+
['Undisclosed recipients:;']),
3296+
[('', '')])
3297+
eq(utils.getaddresses(
3298+
[r'<[email protected]>, "Giant; \"Big\" Box" <[email protected]>']),
3299+
[('', '[email protected]'), ('Giant; "Big" Box', '[email protected]')])
32253300

32263301
def test_getaddresses_embedded_comment(self):
32273302
"""Test proper handling of a nested comment"""

Misc/NEWS.d/3.7.17.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
.. date: 2023-09-11-20-07-52
2+
.. gh-issue:102988
3+
.. nonce: GLWDMX
4+
.. release date: 2023-09-11
5+
.. section: Security
6+
7+
email.utils.getaddresses and email.utils.parseaddr now return
8+
``('', '')`` 2-tuples in more situations where invalid email addresses are
9+
encountered instead of potentially inaccurate values.
10+
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
11+
12+
..
113
.. date: 2023-06-05-04-07-52
214
.. gh-issue: 103142
315
.. nonce: GLWDMX
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
2+
and :func:`email.utils.getaddresses` from returning the realname portion of an
3+
invalid RFC2822 email header in the email address portion of the 2-tuple
4+
returned after being parsed by :class:`email._parseaddr.AddressList`.

0 commit comments

Comments
 (0)