Skip to content

Commit ce4773c

Browse files
committed
Match behavior between Python and C base 85 functions
As we're now keeping the existing Python base 85 functions, the C implementations should behave exactly the same, down to exception type and wording. It is also no longer an error to try to decode data of length 1 mod 5.
1 parent 6c0e4a3 commit ce4773c

File tree

4 files changed

+83
-65
lines changed

4 files changed

+83
-65
lines changed

Lib/_base64.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,57 @@
11
"""C accelerator wrappers for originally pure-Python parts of base64."""
22

3-
from binascii import a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85
3+
from binascii import Error, a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85
4+
from base64 import _bytes_from_decode_data, bytes_types
45

5-
__all__ = ['a85encode', 'a85decode',
6-
'b85encode', 'b85decode',
7-
'z85encode', 'z85decode']
86

7+
# Base 85 encoder functions in base64 silently convert input to bytes.
8+
def _bytes_from_encode_data(b):
9+
return b if isinstance(b, bytes_types) else memoryview(b).tobytes()
910

10-
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
11+
12+
# Functions in binascii raise binascii.Error instead of ValueError.
13+
def raise_valueerror(func):
14+
def _func(*args, **kwargs):
15+
try:
16+
return func(*args, **kwargs)
17+
except Error as e:
18+
raise ValueError(e) from None
19+
return _func
20+
21+
22+
@raise_valueerror
23+
def _a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
24+
b = _bytes_from_encode_data(b)
1125
return b2a_ascii85(b, fold_spaces=foldspaces,
1226
wrap=adobe, width=wrapcol, pad=pad)
1327

1428

15-
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
29+
@raise_valueerror
30+
def _a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
31+
b = _bytes_from_decode_data(b)
1632
return a2b_ascii85(b, fold_spaces=foldspaces,
1733
wrap=adobe, ignore=ignorechars)
1834

1935

20-
def b85encode(b, pad=False):
36+
@raise_valueerror
37+
def _b85encode(b, pad=False):
38+
b = _bytes_from_encode_data(b)
2139
return b2a_base85(b, pad=pad, newline=False)
2240

2341

24-
def b85decode(b):
42+
@raise_valueerror
43+
def _b85decode(b):
44+
b = _bytes_from_decode_data(b)
2545
return a2b_base85(b, strict_mode=True)
2646

2747

28-
def z85encode(s):
48+
@raise_valueerror
49+
def _z85encode(s):
50+
s = _bytes_from_encode_data(s)
2951
return b2a_base85(s, newline=False, z85=True)
3052

3153

32-
def z85decode(s):
54+
@raise_valueerror
55+
def _z85decode(s):
56+
s = _bytes_from_decode_data(s)
3357
return a2b_base85(s, strict_mode=True, z85=True)

Lib/base64.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,21 @@ def decodebytes(s):
578578

579579
# Use accelerated implementations of originally pure-Python parts if possible.
580580
try:
581-
from _base64 import *
581+
from _base64 import (_a85encode, _a85decode, _b85encode,
582+
_b85decode, _z85encode, _z85decode)
583+
from functools import update_wrapper
584+
update_wrapper(_a85encode, a85encode)
585+
update_wrapper(_a85decode, a85decode)
586+
update_wrapper(_b85encode, b85encode)
587+
update_wrapper(_b85decode, b85decode)
588+
update_wrapper(_z85encode, z85encode)
589+
update_wrapper(_z85decode, z85decode)
590+
a85encode = _a85encode
591+
a85decode = _a85decode
592+
b85encode = _b85encode
593+
b85decode = _b85decode
594+
z85encode = _z85encode
595+
z85decode = _z85decode
582596
except ImportError:
583597
pass
584598

Lib/test/test_binascii.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,21 @@ def test_ascii85_valid(self):
242242
res += b
243243
self.assertEqual(res, rawdata)
244244

245+
# Test decoding inputs with length 1 mod 5
246+
params = [
247+
(b"a", False, False, b"", b""),
248+
(b"xbw", False, False, b"wx", b""),
249+
(b"<~c~>", False, True, b"", b""),
250+
(b"{d ~>", False, True, b" {", b""),
251+
(b"ye", True, False, b"", b" "),
252+
(b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "),
253+
(b"<~FCfN8yg~>", True, True, b"", b"test "),
254+
(b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "),
255+
]
256+
for a, fold_spaces, wrap, ignore, b in params:
257+
kwargs = {"fold_spaces": fold_spaces, "wrap": wrap, "ignore": ignore}
258+
self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b)
259+
245260
def test_ascii85_invalid(self):
246261
# Test Ascii85 with invalid characters interleaved
247262
lines, i = [], 0
@@ -284,19 +299,16 @@ def _assertRegexTemplate(assert_regex, data, **kwargs):
284299
binascii.a2b_ascii85(self.type2test(data), **kwargs)
285300

286301
def assertMissingDelimiter(data):
287-
_assertRegexTemplate(r"(?i)end with '~>'", data, wrap=True)
302+
_assertRegexTemplate(r"(?i)end with b'~>'", data, wrap=True)
288303

289304
def assertOverflow(data):
290-
_assertRegexTemplate(r"(?i)85 overflow", data)
305+
_assertRegexTemplate(r"(?i)Ascii85 overflow", data)
291306

292307
def assertInvalidSpecial(data):
293308
_assertRegexTemplate(r"(?i)'[yz]'.+5-tuple", data, fold_spaces=True)
294309

295310
def assertInvalidChar(data, **kwargs):
296-
_assertRegexTemplate(r"(?i)invalid in Ascii85", data, **kwargs)
297-
298-
def assertInvalidLength(data):
299-
_assertRegexTemplate(r"(?i)invalid length", data)
311+
_assertRegexTemplate(r"(?i)Non-Ascii85 digit", data, **kwargs)
300312

301313
# Test Ascii85 with missing delimiters
302314
assertMissingDelimiter(b"")
@@ -331,15 +343,6 @@ def assertInvalidLength(data):
331343
assertInvalidChar(b"\tFCb", ignore=b"\n")
332344
assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignore=b" \n\tv")
333345

334-
# Test Ascii85 with invalid length of final group (1 mod 5)
335-
assertInvalidLength(b"a")
336-
assertInvalidLength(b"b")
337-
assertInvalidLength(b"zc")
338-
assertInvalidLength(b"zza")
339-
assertInvalidLength(b"!!!!!a")
340-
assertInvalidLength(b"+<VdL+<VdLZ")
341-
assertInvalidLength(b"Z" * (5 * 43 + 21))
342-
343346
def test_ascii85_width(self):
344347
# Test Ascii85 splitting lines by width
345348
def assertEncode(a_expected, data, n, wrap=False):
@@ -411,7 +414,7 @@ def test_ascii85_ignore(self):
411414
def assertIgnore(data, expected, ignore=b"", **kwargs):
412415
data = self.type2test(data)
413416
ignore = self.type2test(ignore)
414-
with self.assertRaisesRegex(binascii.Error, r"(?i)invalid in Ascii85"):
417+
with self.assertRaisesRegex(binascii.Error, r"(?i)Non-Ascii85 digit"):
415418
binascii.a2b_ascii85(data, **kwargs)
416419
res = binascii.a2b_ascii85(data, ignore=ignore, **kwargs)
417420
self.assertEqual(res, expected)
@@ -441,6 +444,11 @@ def test_base85_valid(self):
441444
res += b
442445
self.assertEqual(res, self.rawdata)
443446

447+
# Test decoding inputs with length 1 mod 5
448+
self.assertEqual(binascii.a2b_base85(self.type2test(b"a")), b"")
449+
self.assertEqual(binascii.a2b_base85(self.type2test(b" b ")), b"")
450+
self.assertEqual(binascii.a2b_base85(self.type2test(b"b/Y\"*,j'Nc")), b"test")
451+
444452
def test_base85_invalid(self):
445453
# Test base85 with invalid characters interleaved
446454
lines, i = [], 0
@@ -480,9 +488,6 @@ def _assertRegexTemplate(assert_regex, data, **kwargs):
480488
def assertOverflow(data):
481489
_assertRegexTemplate(r"(?i)base85 overflow", data)
482490

483-
def assertInvalidLength(data):
484-
_assertRegexTemplate(r"(?i)invalid length", data)
485-
486491
# Test base85 with out-of-range encoded value
487492
assertOverflow(b"}")
488493
assertOverflow(b"|O")
@@ -492,13 +497,6 @@ def assertInvalidLength(data):
492497
assertOverflow(b"|NsC0~")
493498
assertOverflow(b"|NsC0|NsC0|NsD0")
494499

495-
# Test base85 with invalid length of final group (1 mod 5)
496-
assertInvalidLength(b"0")
497-
assertInvalidLength(b"1")
498-
assertInvalidLength(b"^^^^^^")
499-
assertInvalidLength(b"|NsC0|NsC0a")
500-
assertInvalidLength(b"_" * (5 * 43 + 21))
501-
502500
def test_base85_pad(self):
503501
# Test base85 with encode padding
504502
rawdata = b"n1n3Tee\n ch@rAc\te\r$"
@@ -514,7 +512,7 @@ def test_base85_strict_mode(self):
514512
# Test base85 with strict mode on
515513
def assertNonBase85Data(data, expected):
516514
data = self.type2test(data)
517-
with self.assertRaisesRegex(binascii.Error, r"(?i)invalid in base85"):
515+
with self.assertRaisesRegex(binascii.Error, r"(?i)bad base85 character"):
518516
binascii.a2b_base85(data, strict_mode=True)
519517
default_res = binascii.a2b_base85(data)
520518
non_strict_res = binascii.a2b_base85(data, strict_mode=False)

Modules/binascii.c

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
709709
state = get_binascii_state(module);
710710
if (state != NULL) {
711711
PyErr_SetString(state->Error,
712-
"Expected Ascii85 data to end with '~>'");
712+
"Ascii85 encoded byte sequences must end with b'~>'");
713713
}
714714
return NULL;
715715
}
@@ -782,7 +782,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
782782
} else if (!ignore_map[this_ch]) {
783783
state = get_binascii_state(module);
784784
if (state != NULL) {
785-
PyErr_Format(state->Error, "'%c' invalid in Ascii85", this_ch);
785+
PyErr_Format(state->Error,
786+
"Non-Ascii85 digit found: %c", this_ch);
786787
}
787788
goto error_end;
788789
}
@@ -792,16 +793,6 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
792793
continue;
793794
}
794795

795-
/* Treat encoded length of 1 mod 5 as an error. */
796-
if (ascii_len == -3) {
797-
state = get_binascii_state(module);
798-
if (state != NULL) {
799-
PyErr_SetString(state->Error,
800-
"Ascii85 data has invalid length");
801-
}
802-
goto error_end;
803-
}
804-
805796
/* Write current chunk. */
806797
chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
807798
for (Py_ssize_t i = 0; i < chunk_len; i++) {
@@ -989,8 +980,10 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode,
989980
(leftchar *= 85) > UINT32_MAX - this_digit) {
990981
state = get_binascii_state(module);
991982
if (state != NULL) {
992-
PyErr_SetString(state->Error,
993-
z85 ? "z85 overflow" : "base85 overflow");
983+
PyErr_Format(state->Error,
984+
"%s overflow in hunk starting at byte %d",
985+
z85 ? "z85" : "base85",
986+
(data->len - ascii_len) / 5 * 5);
994987
}
995988
goto error_end;
996989
}
@@ -999,8 +992,8 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode,
999992
} else if (strict_mode) {
1000993
state = get_binascii_state(module);
1001994
if (state != NULL) {
1002-
PyErr_Format(state->Error, "'%c' %s", this_ch,
1003-
z85 ? "invalid in z85" : "invalid in base85");
995+
PyErr_Format(state->Error, "bad %s character at position %d",
996+
z85 ? "z85" : "base85", data->len - ascii_len);
1004997
}
1005998
goto error_end;
1006999
}
@@ -1010,17 +1003,6 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode,
10101003
continue;
10111004
}
10121005

1013-
/* Treat encoded length of 1 mod 5 as an error. */
1014-
if (ascii_len == -3) {
1015-
state = get_binascii_state(module);
1016-
if (state != NULL) {
1017-
PyErr_Format(state->Error,
1018-
z85 ? "z85 data has invalid length"
1019-
: "base85 data has invalid length");
1020-
}
1021-
goto error_end;
1022-
}
1023-
10241006
/* Write current chunk. */
10251007
chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
10261008
for (Py_ssize_t i = 0; i < chunk_len; i++) {

0 commit comments

Comments
 (0)