Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ def __init__(self, value):
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])


# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_MIN_READ_BUF_SIZE = (1 << 20)


class _Framer:

_FRAME_SIZE_MIN = 4
Expand Down Expand Up @@ -289,7 +294,7 @@ def read(self, n):
"pickle exhausted before end of frame")
return data
else:
return self.file_read(n)
return self._chunked_file_read(n)

def readline(self):
if self.current_frame:
Expand All @@ -304,11 +309,23 @@ def readline(self):
else:
return self.file_readline()

def _chunked_file_read(self, size):
cursize = min(size, _MIN_READ_BUF_SIZE)
b = self.file_read(cursize)
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.file_read(delta)
cursize += delta
return b

def load_frame(self, frame_size):
if self.current_frame and self.current_frame.read() != b'':
raise UnpicklingError(
"beginning of a new frame before end of current frame")
self.current_frame = io.BytesIO(self.file_read(frame_size))
data = self._chunked_file_read(frame_size)
if len(data) < frame_size:
raise EOFError
self.current_frame = io.BytesIO(data)


# Tools used for pickling.
Expand Down Expand Up @@ -1416,12 +1433,17 @@ def load_binbytes8(self):
dispatch[BINBYTES8[0]] = load_binbytes8

def load_bytearray8(self):
len, = unpack('<Q', self.read(8))
if len > maxsize:
size, = unpack('<Q', self.read(8))
if size > maxsize:
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
"of %d bytes" % maxsize)
b = bytearray(len)
self.readinto(b)
cursize = min(size, _MIN_READ_BUF_SIZE)
b = bytearray(cursize)
if self.readinto(b) == cursize:
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.read(delta)
cursize += delta
self.append(b)
dispatch[BYTEARRAY8[0]] = load_bytearray8

Expand Down
115 changes: 112 additions & 3 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,9 +853,8 @@ def assert_is_copy(self, obj, objcopy, msg=None):
self.assertEqual(getattr(obj, slot, None),
getattr(objcopy, slot, None), msg=msg)

def check_unpickling_error(self, errors, data):
with self.subTest(data=data), \
self.assertRaises(errors):
def check_unpickling_error_strict(self, errors, data):
with self.assertRaises(errors):
try:
self.loads(data)
except BaseException as exc:
Expand All @@ -864,6 +863,10 @@ def check_unpickling_error(self, errors, data):
(data, exc.__class__.__name__, exc))
raise

def check_unpickling_error(self, errors, data):
with self.subTest(data=data):
self.check_unpickling_error_strict(errors, data)

def test_load_from_data0(self):
self.assert_is_copy(self._testdata, self.loads(DATA0))

Expand Down Expand Up @@ -1115,6 +1118,112 @@ def test_negative_32b_binput(self):
dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
self.check_unpickling_error(ValueError, dumped)

def itersize(self, start, stop):
# Produce geometrical increasing sequence from start to stop
# (inclusively) for tests.
size = start
while size < stop:
yield size
size <<= 1
yield stop

def test_too_large_put(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment explaining why this and the next test method result in ([], []) being returned no matter what rather than an error when the values are too large? (I suspect readers with a knowledge of the specific pickle protocol may understand, but it isn't obvious otherwise)

data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
for idx in [10**6, 10**9, 10**12]:
self.assertEqual(self.loads(data(idx)), ([],)*2)

def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
self.assertEqual(self.loads(data(idx)), ([],)*2)

def _test_truncated_data(self, dumped, expected_error=None):
if expected_error is None:
expected_error = self.truncated_data_error
# BytesIO
with self.assertRaisesRegex(*expected_error):
self.loads(dumped)
if hasattr(self, 'unpickler'):
try:
with open(TESTFN, 'wb') as f:
f.write(dumped)
# buffered file
with open(TESTFN, 'rb') as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
# unbuffered file
with open(TESTFN, 'rb', buffering=0) as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
finally:
os_helper.unlink(TESTFN)

def test_truncated_large_binstring(self):
data = lambda size: b'T' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'truncated|exceeds|negative byte count'))

def test_truncated_large_binunicode(self):
data = lambda size: b'X' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 32) - 1)):
self._test_truncated_data(data(size))

def test_truncated_large_binbytes(self):
data = lambda size: b'B' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize, 1 << 31)):
self._test_truncated_data(data(size))

def test_truncated_large_long4(self):
data = lambda size: b'\x8b' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), 0x2e2e2e2e) # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'LONG pickle has negative byte count'))

def test_truncated_large_frame(self):
data = lambda size: b'\x95' + struct.pack('<Q', size) + b'N.'
self.assertIsNone(self.loads(data(2))) # self-testing
for size in self.itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1),
((OverflowError, ValueError),
'FRAME length exceeds|frame size > sys.maxsize'))

def test_truncated_large_binunicode8(self):
data = lambda size: b'\x8d' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_binbytes8(self):
data = lambda size: b'\x8e' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in self.itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_bytearray8(self):
data = lambda size: b'\x96' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), bytearray(b'....')) # self-testing
for size in self.itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_badly_escaped_string(self):
self.check_unpickling_error(ValueError, b"S'\\'\n.")

Expand Down
8 changes: 7 additions & 1 deletion Lib/test/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class PyUnpicklerTests(AbstractUnpickleTests, unittest.TestCase):
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = (EOFError, '')
size_overflow_error = (pickle.UnpicklingError, 'exceeds')

def loads(self, buf, **kwds):
f = io.BytesIO(buf)
Expand Down Expand Up @@ -93,6 +95,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = ((pickle.UnpicklingError, EOFError), '')
size_overflow_error = ((OverflowError, pickle.UnpicklingError), 'exceeds')

def dumps(self, arg, protocol=None, **kwargs):
return pickle.dumps(arg, protocol, **kwargs)
Expand Down Expand Up @@ -281,6 +285,8 @@ class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
truncated_errors = (pickle.UnpicklingError,)
truncated_data_error = (pickle.UnpicklingError, 'truncated')
size_overflow_error = (OverflowError, 'exceeds')

class CPicklingErrorTests(PyPicklingErrorTests):
pickler = _pickle.Pickler
Expand Down Expand Up @@ -384,7 +390,7 @@ def test_pickler(self):
0) # Write buffer is cleared after every dump().

def test_unpickler(self):
basesize = support.calcobjsize('2P2nP 2P2n2i5P 2P3n8P2n2i')
basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
unpickler = _pickle.Unpickler
P = struct.calcsize('P') # Size of memo table entry.
n = struct.calcsize('n') # Size of mark table entry.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`pickle` now avoids allocating an arbitrary large amount of memory
based on small untrusted input when unpickling specially crafted data.
Loading
Loading