Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cloudpickle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"__version__",
"Pickler",
"CloudPickler",
"PurePythonPickler",
"dumps",
"loads",
"dump",
Expand Down
251 changes: 148 additions & 103 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,16 @@ def _get_dataclass_field_type_sentinel(name):
return _DATACLASSE_FIELD_TYPE_SENTINELS[name]


class Pickler(pickle.Pickler):
class BaseCloudPickler:
"""Class for logic that is common between FastPickler and PurePythonPickler.

Cloudpickle provides two picklers: one extending the C implementation of
the CPython pickler and another extending the pure-Python pickler.
FastPickler and PurePythonPickler inherit from BaseCloudPickler and provide
BaseCloudPickler access to either the C or pure-Python pickler by
implementing the super_pickler() method.
"""

# set of reducers defined and used by cloudpickle (private)
_dispatch_table = {}
_dispatch_table[classmethod] = _classmethod_reduce
Expand Down Expand Up @@ -1300,7 +1309,7 @@ def _function_getnewargs(self, func):

def dump(self, obj):
try:
return super().dump(obj)
return self.super_pickler().dump(obj)
except RuntimeError as e:
if len(e.args) > 0 and "recursion" in e.args[0]:
msg = "Could not pickle object as excessively deep recursion required."
Expand All @@ -1311,14 +1320,33 @@ def dump(self, obj):
def __init__(self, file, protocol=None, buffer_callback=None):
if protocol is None:
protocol = DEFAULT_PROTOCOL
super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
# map functions __globals__ attribute ids, to ensure that functions
# sharing the same global namespace at pickling time also share
# their global namespace at unpickling time.
self.globals_ref = {}
self.proto = int(protocol)
self.super_pickler().__init__(
file, protocol=protocol, buffer_callback=buffer_callback
)

def super_pickler(self):
"""Returns which pickler class for cloudpickle to extend."""
raise NotImplemented

if not PYPY:
class FastPickler(BaseCloudPickler, pickle.Pickler):
"""Fast pickler extending the C implementation of the CPython pickler.

The FastPickler is not available for PYPY and does not support
overriding how built-in types are pickled.
"""

def __init__(self, file, protocol=None, buffer_callback=None):
super(FastPickler, self).__init__(file, protocol, buffer_callback)

def super_pickler(self):
return super(BaseCloudPickler, self)

if not PYPY:
# pickle.Pickler is the C implementation of the CPython pickler and
# therefore we rely on reduce_override method to customize the pickler
# behavior.
Expand All @@ -1334,7 +1362,7 @@ def __init__(self, file, protocol=None, buffer_callback=None):
# name was not a great choice given because it would collide with a
# similarly named attribute in the pure-Python `pickle._Pickler`
# implementation in the standard library.
dispatch = dispatch_table
dispatch = BaseCloudPickler.dispatch_table

# Implementation of the reducer_override callback, in order to
# efficiently serialize dynamic functions and classes by subclassing
Expand Down Expand Up @@ -1391,113 +1419,123 @@ def reducer_override(self, obj):
# dispatch_table
return NotImplemented

else:
# When reducer_override is not available, hack the pure-Python
# Pickler's types.FunctionType and type savers. Note: the type saver
# must override Pickler.save_global, because pickle.py contains a
# hard-coded call to save_global when pickling meta-classes.
dispatch = pickle.Pickler.dispatch.copy()

def _save_reduce_pickle5(
self,
class PurePythonPickler(BaseCloudPickler, pickle._Pickler):
"""Pure-Python pickler.

This picker supports overriding how built-in types are pickled.
"""
def __init__(self, file, protocol=None, buffer_callback=None):
super(PurePythonPickler, self).__init__(file, protocol, buffer_callback)

def super_pickler(self):
return super(BaseCloudPickler, self)

# When reducer_override is not available, hack the pure-Python
# Pickler's types.FunctionType and type savers. Note: the type saver
# must override Pickler.save_global, because pickle.py contains a
# hard-coded call to save_global when pickling meta-classes.
dispatch = pickle._Pickler.dispatch.copy()

def _save_reduce_pickle5(
self,
func,
args,
state=None,
listitems=None,
dictitems=None,
state_setter=None,
obj=None,
):
save = self.save
write = self.write
self.save_reduce(
func,
args,
state=None,
listitems=None,
dictitems=None,
state_setter=None,
obj=None,
):
save = self.save
write = self.write
self.save_reduce(
func,
args,
state=None,
listitems=listitems,
dictitems=dictitems,
obj=obj,
listitems=listitems,
dictitems=dictitems,
obj=obj,
)
# backport of the Python 3.8 state_setter pickle operations
save(state_setter)
save(obj) # simple BINGET opcode as obj is already memoized.
save(state)
write(pickle.TUPLE2)
# Trigger a state_setter(obj, state) function call.
write(pickle.REDUCE)
# The purpose of state_setter is to carry-out an
# inplace modification of obj. We do not care about what the
# method might return, so its output is eventually removed from
# the stack.
write(pickle.POP)

def save_global(self, obj, name=None, pack=struct.pack):
"""Main dispatch method.

The name of this method is somewhat misleading: all types get
dispatched here.
"""
if obj is type(None): # noqa
return self.save_reduce(type, (None,), obj=obj)
elif obj is type(Ellipsis):
return self.save_reduce(type, (Ellipsis,), obj=obj)
elif obj is type(NotImplemented):
return self.save_reduce(type, (NotImplemented,), obj=obj)
elif obj in _BUILTIN_TYPE_NAMES:
return self.save_reduce(
_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
)
# backport of the Python 3.8 state_setter pickle operations
save(state_setter)
save(obj) # simple BINGET opcode as obj is already memoized.
save(state)
write(pickle.TUPLE2)
# Trigger a state_setter(obj, state) function call.
write(pickle.REDUCE)
# The purpose of state_setter is to carry-out an
# inplace modification of obj. We do not care about what the
# method might return, so its output is eventually removed from
# the stack.
write(pickle.POP)

def save_global(self, obj, name=None, pack=struct.pack):
"""Main dispatch method.

The name of this method is somewhat misleading: all types get
dispatched here.
"""
if obj is type(None): # noqa
return self.save_reduce(type, (None,), obj=obj)
elif obj is type(Ellipsis):
return self.save_reduce(type, (Ellipsis,), obj=obj)
elif obj is type(NotImplemented):
return self.save_reduce(type, (NotImplemented,), obj=obj)
elif obj in _BUILTIN_TYPE_NAMES:
return self.save_reduce(
_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
)

if name is not None:
super().save_global(obj, name=name)
elif not _should_pickle_by_reference(obj, name=name):
self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
else:
super().save_global(obj, name=name)

dispatch[type] = save_global
if name is not None:
super().save_global(obj, name=name)
elif not _should_pickle_by_reference(obj, name=name):
self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
else:
super().save_global(obj, name=name)

def save_function(self, obj, name=None):
"""Registered with the dispatch to handle all function types.
dispatch[type] = save_global

Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
if _should_pickle_by_reference(obj, name=name):
return super().save_global(obj, name=name)
elif PYPY and isinstance(obj.__code__, builtin_code_type):
return self.save_pypy_builtin_func(obj)
else:
return self._save_reduce_pickle5(
*self._dynamic_function_reduce(obj), obj=obj
)

def save_pypy_builtin_func(self, obj):
"""Save pypy equivalent of builtin functions.

PyPy does not have the concept of builtin-functions. Instead,
builtin-functions are simple function instances, but with a
builtin-code attribute.
Most of the time, builtin functions should be pickled by attribute.
But PyPy has flaky support for __qualname__, so some builtin
functions such as float.__new__ will be classified as dynamic. For
this reason only, we created this special routine. Because
builtin-functions are not expected to have closure or globals,
there is no additional hack (compared the one already implemented
in pickle) to protect ourselves from reference cycles. A simple
(reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
also that PyPy improved their support for __qualname__ in v3.6, so
this routing should be removed when cloudpickle supports only PyPy
3.6 and later.
"""
rv = (
types.FunctionType,
(obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
obj.__dict__,
def save_function(self, obj, name=None):
"""Registered with the dispatch to handle all function types.

Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
if _should_pickle_by_reference(obj, name=name):
return super().save_global(obj, name=name)
elif PYPY and isinstance(obj.__code__, builtin_code_type):
return self.save_pypy_builtin_func(obj)
else:
return self._save_reduce_pickle5(
*self._dynamic_function_reduce(obj), obj=obj
)
self.save_reduce(*rv, obj=obj)

dispatch[types.FunctionType] = save_function
def save_pypy_builtin_func(self, obj):
"""Save pypy equivalent of builtin functions.

PyPy does not have the concept of builtin-functions. Instead,
builtin-functions are simple function instances, but with a
builtin-code attribute.
Most of the time, builtin functions should be pickled by attribute.
But PyPy has flaky support for __qualname__, so some builtin
functions such as float.__new__ will be classified as dynamic. For
this reason only, we created this special routine. Because
builtin-functions are not expected to have closure or globals,
there is no additional hack (compared the one already implemented
in pickle) to protect ourselves from reference cycles. A simple
(reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
also that PyPy improved their support for __qualname__ in v3.6, so
this routing should be removed when cloudpickle supports only PyPy
3.6 and later.
"""
rv = (
types.FunctionType,
(obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
obj.__dict__,
)
self.save_reduce(*rv, obj=obj)

dispatch[types.FunctionType] = save_function


# Shorthands similar to pickle.dump/pickle.dumps
Expand Down Expand Up @@ -1541,5 +1579,12 @@ def dumps(obj, protocol=None, buffer_callback=None):
# Include pickles unloading functions in this namespace for convenience.
load, loads = pickle.load, pickle.loads

# Use the fast pickler extending the C implementation of pickler if it is
# available.
if PYPY:
Pickler = PurePythonPickler
else:
Pickler = FastPickler

# Backward compat alias.
CloudPickler = Pickler
8 changes: 8 additions & 0 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3040,6 +3040,14 @@ def echo(*args):
""".format(protocol=self.protocol)
assert_run_python_script(code)

def test_pure_python_pickler_overrides_built_in_type_pickling(self):
bio = io.BytesIO()
pickler = cloudpickle.PurePythonPickler(bio)
pickler.dispatch[set] = lambda p, s: p.save_set([-e for e in s])
pickler.dump({1, 2, 3, 4, 5})
bio.seek(0)
self.assertEqual(cloudpickle.load(bio), {-1, -2, -3, -4, -5})


class Protocol2CloudPickleTest(CloudPickleTest):
protocol = 2
Expand Down