Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ def __init__(self, value):
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])


# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
SAFE_BUF_SIZE = (1 << 20)


class _Framer:

_FRAME_SIZE_MIN = 4
Expand Down Expand Up @@ -289,7 +294,7 @@ def read(self, n):
"pickle exhausted before end of frame")
return data
else:
return self.file_read(n)
return self.safe_file_read(n)

def readline(self):
if self.current_frame:
Expand All @@ -304,11 +309,23 @@ def readline(self):
else:
return self.file_readline()

def safe_file_read(self, size):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use an ill-defined word such as "safe" in the name. Just describe what it does. chunked_file_read() makes more sense. Same with SAFE_BUF_SIZE just call it _READ_BUF_SIZE (with an underscore prefix so it isn't considered a public API).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that there is _safe_read in http.client, although ironically it is vulnerable to this kind of issue.

I afraid that name like chunked_file_read could be misleading, as the data is not read by fixed size chunks. I guess this is what leads you to misunderstanding about the O(N^2) behavior.

cursize = min(size, SAFE_BUF_SIZE)
b = self.file_read(cursize)
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.file_read(delta)
cursize += delta
return b

def load_frame(self, frame_size):
if self.current_frame and self.current_frame.read() != b'':
raise UnpicklingError(
"beginning of a new frame before end of current frame")
self.current_frame = io.BytesIO(self.file_read(frame_size))
data = self.safe_file_read(frame_size)
if len(data) < frame_size:
raise EOFError
self.current_frame = io.BytesIO(data)


# Tools used for pickling.
Expand Down Expand Up @@ -1378,12 +1395,17 @@ def load_binbytes8(self):
dispatch[BINBYTES8[0]] = load_binbytes8

def load_bytearray8(self):
len, = unpack('<Q', self.read(8))
if len > maxsize:
size, = unpack('<Q', self.read(8))
if size > maxsize:
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
"of %d bytes" % maxsize)
b = bytearray(len)
self.readinto(b)
cursize = min(size, SAFE_BUF_SIZE)
b = bytearray(cursize)
if self.readinto(b) == cursize:
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.read(delta)
cursize += delta
self.append(b)
dispatch[BYTEARRAY8[0]] = load_bytearray8

Expand Down
101 changes: 98 additions & 3 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,9 +853,8 @@ def assert_is_copy(self, obj, objcopy, msg=None):
self.assertEqual(getattr(obj, slot, None),
getattr(objcopy, slot, None), msg=msg)

def check_unpickling_error(self, errors, data):
with self.subTest(data=data), \
self.assertRaises(errors):
def check_unpickling_error_strict(self, errors, data):
with self.assertRaises(errors):
try:
self.loads(data)
except BaseException as exc:
Expand All @@ -864,6 +863,10 @@ def check_unpickling_error(self, errors, data):
(data, exc.__class__.__name__, exc))
raise

def check_unpickling_error(self, errors, data):
with self.subTest(data=data):
self.check_unpickling_error_strict(errors, data)

def test_load_from_data0(self):
self.assert_is_copy(self._testdata, self.loads(DATA0))

Expand Down Expand Up @@ -1115,6 +1118,98 @@ def test_negative_32b_binput(self):
dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
self.check_unpickling_error(ValueError, dumped)

def itersize(self, start, stop):
size = start
while size < stop:
yield size
size <<= 1
yield stop

def _test_truncated_data(self, dumped, expected_error=None):
if expected_error is None:
expected_error = self.truncated_data_error
# BytesIO
with self.assertRaisesRegex(*expected_error):
self.loads(dumped)
if hasattr(self, 'unpickler'):
try:
with open(TESTFN, 'wb') as f:
f.write(dumped)
# buffered file
with open(TESTFN, 'rb') as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
# unbuffered file
with open(TESTFN, 'rb', buffering=0) as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
finally:
os_helper.unlink(TESTFN)

def test_truncated_large_binstring(self):
data = lambda size: b'T' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'truncated|exceeds|negative byte count'))

def test_truncated_large_binunicode(self):
data = lambda size: b'X' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 32) - 1)):
self._test_truncated_data(data(size))

def test_truncated_large_binbytes(self):
data = lambda size: b'B' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize, 1 << 31)):
self._test_truncated_data(data(size))

def test_truncated_large_long4(self):
data = lambda size: b'\x8b' + struct.pack('<I', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), 0x2e2e2e2e) # self-testing
for size in self.itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'LONG pickle has negative byte count'))

def test_truncated_large_frame(self):
data = lambda size: b'\x95' + struct.pack('<Q', size) + b'N.'
self.assertIsNone(self.loads(data(2))) # self-testing
for size in self.itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1),
((OverflowError, ValueError),
'FRAME length exceeds|frame size > sys.maxsize'))

def test_truncated_large_binunicode8(self):
data = lambda size: b'\x8d' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in self.itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_binbytes8(self):
data = lambda size: b'\x8e' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in self.itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_bytearray8(self):
data = lambda size: b'\x96' + struct.pack('<Q', size) + b'.' * 5
self.assertEqual(self.loads(data(4)), bytearray(b'....')) # self-testing
for size in self.itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_badly_escaped_string(self):
self.check_unpickling_error(ValueError, b"S'\\'\n.")

Expand Down
38 changes: 38 additions & 0 deletions Lib/test/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,27 @@ class PyUnpicklerTests(AbstractUnpickleTests, unittest.TestCase):
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = (EOFError, '')
size_overflow_error = (pickle.UnpicklingError, 'exceeds')

def loads(self, buf, **kwds):
f = io.BytesIO(buf)
u = self.unpickler(f, **kwds)
return u.load()

def test_too_large_put(self):
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
for idx in [10**6, 10**9, 10**12]:
self.assertEqual(self.loads(data(idx)), ([],)*2)


def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
for idx in self.itersize(1 << 17, min(sys.maxsize, (1 << 32) - 1)):
self.assertEqual(self.loads(data(idx)), ([],)*2)


class PyPicklerTests(AbstractPickleTests, unittest.TestCase):

Expand All @@ -80,6 +95,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = ((pickle.UnpicklingError, EOFError), '')
size_overflow_error = ((OverflowError, pickle.UnpicklingError), 'exceeds')

def dumps(self, arg, protocol=None, **kwargs):
return pickle.dumps(arg, protocol, **kwargs)
Expand Down Expand Up @@ -266,6 +283,27 @@ class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
truncated_errors = (pickle.UnpicklingError,)
truncated_data_error = (pickle.UnpicklingError, 'truncated')
size_overflow_error = (OverflowError, 'exceeds')

def test_too_large_put(self):
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
self.assertEqual(self.loads(data(100000)), ([],)*2) # self-testing
for idx in [10**6, 10**9, min(sys.maxsize, 10**12)]:
with self.assertRaisesRegex(pickle.UnpicklingError,
'too sparse memo indices'):
self.loads(data(idx))


def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
self.assertEqual(self.loads(data(1 << 16)), ([],)*2) # self-testing
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
with self.assertRaisesRegex(pickle.UnpicklingError,
'too sparse memo indices'):
self.loads(data(idx))

class CPicklerTests(PyPicklerTests):
pickler = _pickle.Pickler
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a vulnerability in the :mod:`pickle` module which allowed to consume
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggested new wording:

:mod:`pickle` now avoids a potential allocated virtual memory
address space denial of service condition when
unpickling specially crafted data.

Pickle isn't for use on untrusted data and no privileges could be escalated, so calling this a vulnerability is a stretch. It also wasn't a bad one, it didn't consume memory, it just mapped virtual address space.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I consumes memory. The mapped virtual address space is filled with data (at least in debug build).

Pickle isn't safe for untrusted data by default, but it can be made safe (by strictly restricting what data it can handle) after resolving this issue.

arbitrary large amount of memory when loading a small data which does not
even involve arbitrary code execution.
Loading