diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 6795a606de037e..7c2dfb360f95ad 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -113,9 +113,14 @@ def decode_b(encoded): # The non-alphabet characters are ignored as far as padding # goes, but we don't know how many there are. So try without adding # padding to see if it works. + # + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode + # both the urlsafe and regular alphabets. + try: return ( - base64.b64decode(encoded, validate=False), + base64.urlsafe_b64decode(encoded), [errors.InvalidBase64CharactersDefect()], ) except binascii.Error: @@ -123,7 +128,7 @@ def decode_b(encoded): # is ignored). try: return ( - base64.b64decode(encoded + b'==', validate=False), + base64.urlsafe_b64decode(encoded + b'=='), [errors.InvalidBase64CharactersDefect(), errors.InvalidBase64PaddingDefect()], ) diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index a5a3f737a97b51..3363a5bc45f2d4 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -34,8 +34,8 @@ ] -from base64 import b64encode -from binascii import b2a_base64, a2b_base64 +from base64 import b64encode, urlsafe_b64decode +from binascii import b2a_base64 CRLF = '\r\n' NL = '\n' @@ -102,12 +102,15 @@ def decode(string): base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high level email.header class for that functionality. """ + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode both + # the urlsafe and regular alphabets. if not string: return bytes() elif isinstance(string, str): - return a2b_base64(string.encode('raw-unicode-escape')) + return urlsafe_b64decode(string.encode('raw-unicode-escape')) else: - return a2b_base64(string) + return urlsafe_b64decode(string) # For convenience and backwards compatibility w/ standard base64 module diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 1713962f94caef..2686448565bbbe 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -38,6 +38,14 @@ def test_missing_padding(self): # 2 missing padding characters self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect]) + def test_urlsafe_alphabet(self): + self._test( + b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', + b'Anmeldung Netzanschluss S\xfcdring3p.jpg', + [errors.InvalidBase64CharactersDefect]) + # mix of different base64 alphabets + self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect]) + def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b8116d073a2670..03381ea1b72b2f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4663,6 +4663,11 @@ def test_decode(self): eq = self.assertEqual eq(base64mime.decode(''), b'') eq(base64mime.decode('aGVsbG8='), b'hello') + eq(base64mime.decode( + 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), + b'Anmeldung Netzanschluss S\xfcdring3p.jpg') + # mix of different base64 alphabets + eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????') def test_encode(self): eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst new file mode 100644 index 00000000000000..d23e7eb2bd4c0e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst @@ -0,0 +1,2 @@ +Accept urlsafe base64 in email headers, as those are sometimes created by +email clients.