@@ -126,7 +126,7 @@ def _lookup_class_or_track(class_tracker_id, class_def):
126126
127127
128128def register_pickle_by_value (module ):
129- """Register a module to make it functions and classes picklable by value.
129+ """Register a module to make its functions and classes picklable by value.
130130
131131 By default, functions and classes that are attributes of an importable
132132 module are to be pickled by reference, that is relying on re-importing
@@ -369,7 +369,7 @@ def func():
369369 # sys.modules.
370370 if name is not None and name .startswith (prefix ):
371371 # check whether the function can address the sub-module
372- tokens = set (name [len (prefix ) :].split ("." ))
372+ tokens = set (name [len (prefix ):].split ("." ))
373373 if not tokens - set (code .co_names ):
374374 subimports .append (sys .modules [name ])
375375 return subimports
@@ -409,7 +409,10 @@ def _walk_global_ops(code):
409409
410410def _extract_class_dict (cls ):
411411 """Retrieve a copy of the dict of a class without the inherited method."""
412- clsdict = dict (cls .__dict__ ) # copy dict proxy to a dict
412+ # Hack to circumvent non-predictable memoization caused by string interning.
413+ # See the inline comment in _class_setstate for details.
414+ clsdict = {"" .join (k ): cls .__dict__ [k ] for k in sorted (cls .__dict__ )}
415+
413416 if len (cls .__bases__ ) == 1 :
414417 inherited_dict = cls .__bases__ [0 ].__dict__
415418 else :
@@ -533,9 +536,15 @@ class id will also reuse this class definition.
533536 The "extra" variable is meant to be a dict (or None) that can be used for
534537 forward compatibility shall the need arise.
535538 """
539+ # We need to intern the keys of the type_kwargs dict to avoid having
540+ # different pickles for the same dynamic class depending on whether it was
541+ # dynamically created or reconstructed from a pickled stream.
542+ type_kwargs = {sys .intern (k ): v for k , v in type_kwargs .items ()}
543+
536544 skeleton_class = types .new_class (
537545 name , bases , {"metaclass" : type_constructor }, lambda ns : ns .update (type_kwargs )
538546 )
547+
539548 return _lookup_class_or_track (class_tracker_id , skeleton_class )
540549
541550
@@ -694,7 +703,9 @@ def _function_getstate(func):
694703 # unpickling time by iterating over slotstate and calling setattr(func,
695704 # slotname, slotvalue)
696705 slotstate = {
697- "__name__" : func .__name__ ,
706+ # Hack to circumvent non-predictable memoization caused by string interning.
707+ # See the inline comment in _class_setstate for details.
708+ "__name__" : "" .join (func .__name__ ),
698709 "__qualname__" : func .__qualname__ ,
699710 "__annotations__" : func .__annotations__ ,
700711 "__kwdefaults__" : func .__kwdefaults__ ,
@@ -721,7 +732,9 @@ def _function_getstate(func):
721732 )
722733 slotstate ["__globals__" ] = f_globals
723734
724- state = func .__dict__
735+ # Hack to circumvent non-predictable memoization caused by string interning.
736+ # See the inline comment in _class_setstate for details.
737+ state = {"" .join (k ): v for k , v in func .__dict__ .items ()}
725738 return state , slotstate
726739
727740
@@ -802,6 +815,19 @@ def _code_reduce(obj):
802815 # of the specific type from types, for example:
803816 # >>> from types import CodeType
804817 # >>> help(CodeType)
818+
819+ # Hack to circumvent non-predictable memoization caused by string interning.
820+ # See the inline comment in _class_setstate for details.
821+ co_name = "" .join (obj .co_name )
822+
823+ # Create shallow copies of these tuple to make cloudpickle payload deterministic.
824+ # When creating a code object during load, copies of these four tuples are
825+ # created, while in the main process, these tuples can be shared.
826+ # By always creating copies, we make sure the resulting payload is deterministic.
827+ co_names = tuple (name for name in obj .co_names )
828+ co_varnames = tuple (name for name in obj .co_varnames )
829+ co_freevars = tuple (name for name in obj .co_freevars )
830+ co_cellvars = tuple (name for name in obj .co_cellvars )
805831 if hasattr (obj , "co_exceptiontable" ):
806832 # Python 3.11 and later: there are some new attributes
807833 # related to the enhanced exceptions.
@@ -814,16 +840,16 @@ def _code_reduce(obj):
814840 obj .co_flags ,
815841 obj .co_code ,
816842 obj .co_consts ,
817- obj . co_names ,
818- obj . co_varnames ,
843+ co_names ,
844+ co_varnames ,
819845 obj .co_filename ,
820- obj . co_name ,
846+ co_name ,
821847 obj .co_qualname ,
822848 obj .co_firstlineno ,
823849 obj .co_linetable ,
824850 obj .co_exceptiontable ,
825- obj . co_freevars ,
826- obj . co_cellvars ,
851+ co_freevars ,
852+ co_cellvars ,
827853 )
828854 elif hasattr (obj , "co_linetable" ):
829855 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
@@ -837,14 +863,14 @@ def _code_reduce(obj):
837863 obj .co_flags ,
838864 obj .co_code ,
839865 obj .co_consts ,
840- obj . co_names ,
841- obj . co_varnames ,
866+ co_names ,
867+ co_varnames ,
842868 obj .co_filename ,
843- obj . co_name ,
869+ co_name ,
844870 obj .co_firstlineno ,
845871 obj .co_linetable ,
846- obj . co_freevars ,
847- obj . co_cellvars ,
872+ co_freevars ,
873+ co_cellvars ,
848874 )
849875 elif hasattr (obj , "co_nmeta" ): # pragma: no cover
850876 # "nogil" Python: modified attributes from 3.9
@@ -859,15 +885,15 @@ def _code_reduce(obj):
859885 obj .co_flags ,
860886 obj .co_code ,
861887 obj .co_consts ,
862- obj . co_varnames ,
888+ co_varnames ,
863889 obj .co_filename ,
864- obj . co_name ,
890+ co_name ,
865891 obj .co_firstlineno ,
866892 obj .co_lnotab ,
867893 obj .co_exc_handlers ,
868894 obj .co_jump_table ,
869- obj . co_freevars ,
870- obj . co_cellvars ,
895+ co_freevars ,
896+ co_cellvars ,
871897 obj .co_free2reg ,
872898 obj .co_cell2reg ,
873899 )
@@ -882,14 +908,14 @@ def _code_reduce(obj):
882908 obj .co_flags ,
883909 obj .co_code ,
884910 obj .co_consts ,
885- obj . co_names ,
886- obj . co_varnames ,
911+ co_names ,
912+ co_varnames ,
887913 obj .co_filename ,
888- obj . co_name ,
914+ co_name ,
889915 obj .co_firstlineno ,
890916 obj .co_lnotab ,
891- obj . co_freevars ,
892- obj . co_cellvars ,
917+ co_freevars ,
918+ co_cellvars ,
893919 )
894920 return types .CodeType , args
895921
@@ -1127,6 +1153,18 @@ def _class_setstate(obj, state):
11271153 if attrname == "_abc_impl" :
11281154 registry = attr
11291155 else :
1156+ # Note: setting attribute names on a class automatically triggers their
1157+ # interning in CPython:
1158+ # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
1159+ #
1160+ # This means that to get deterministic pickling for a dynamic class that
1161+ # was initially defined in a different Python process, the pickler
1162+ # needs to ensure that dynamic class and function attribute names are
1163+ # systematically copied into a non-interned version to avoid
1164+ # unpredictable pickle payloads.
1165+ #
1166+ # Indeed the Pickler's memoizer relies on physical object identity to break
1167+ # cycles in the reference graph of the object being serialized.
11301168 setattr (obj , attrname , attr )
11311169 if registry is not None :
11321170 for subclass in registry :
0 commit comments