Skip to content

Commit c9b9aa5

Browse files
committed
[3.12] pythongh-122213: Add notes for pickle errors
When pickle encounters a serialization error deep in a nested data structure, the resulting traceback gives no indication of where in the object graph the failure occurred. This makes debugging difficult for large or deeply nested objects. Backport of c0c2aa7 from main. Each save() call site now catches BaseException and attaches a note identifying the context, such as "when serializing list item 3" or "when serializing MyClass state". Both the Python and C implementations are updated, along with tests asserting the expected notes on all error paths.
1 parent dc35395 commit c9b9aa5

File tree

3 files changed

+530
-117
lines changed

3 files changed

+530
-117
lines changed

Lib/pickle.py

Lines changed: 153 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,14 @@ def decode_long(data):
397397
return int.from_bytes(data, byteorder='little', signed=True)
398398

399399

400+
def _T(obj):
401+
cls = type(obj)
402+
module = cls.__module__
403+
if module in (None, 'builtins', '__main__'):
404+
return cls.__qualname__
405+
return f'{module}.{cls.__qualname__}'
406+
407+
400408
_NoValue = object()
401409

402410
# Pickling machinery
@@ -587,18 +595,22 @@ def save(self, obj, save_persistent_id=True):
587595
self.save_global(obj, rv)
588596
return
589597

590-
# Assert that reduce() returned a tuple
591-
if not isinstance(rv, tuple):
592-
raise PicklingError("%s must return string or tuple" % reduce)
593-
594-
# Assert that it returned an appropriately sized tuple
595-
l = len(rv)
596-
if not (2 <= l <= 6):
597-
raise PicklingError("Tuple returned by %s must have "
598-
"two to six elements" % reduce)
599-
600-
# Save the reduce() output and finally memoize the object
601-
self.save_reduce(obj=obj, *rv)
598+
try:
599+
# Assert that reduce() returned a tuple
600+
if not isinstance(rv, tuple):
601+
raise PicklingError("%s must return string or tuple" % reduce)
602+
603+
# Assert that it returned an appropriately sized tuple
604+
l = len(rv)
605+
if not (2 <= l <= 6):
606+
raise PicklingError("Tuple returned by %s must have "
607+
"two to six elements" % reduce)
608+
609+
# Save the reduce() output and finally memoize the object
610+
self.save_reduce(obj=obj, *rv)
611+
except BaseException as exc:
612+
exc.add_note(f'when serializing {_T(obj)} object')
613+
raise
602614

603615
def persistent_id(self, obj):
604616
# This exists so a subclass can override it
@@ -638,13 +650,25 @@ def save_reduce(self, func, args, state=None, listitems=None,
638650
raise PicklingError("args[0] from {} args has the wrong class"
639651
.format(func_name))
640652
if self.proto >= 4:
641-
save(cls)
642-
save(args)
643-
save(kwargs)
653+
try:
654+
save(cls)
655+
except BaseException as exc:
656+
exc.add_note(f'when serializing {_T(obj)} class')
657+
raise
658+
try:
659+
save(args)
660+
save(kwargs)
661+
except BaseException as exc:
662+
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
663+
raise
644664
write(NEWOBJ_EX)
645665
else:
646666
func = partial(cls.__new__, cls, *args, **kwargs)
647-
save(func)
667+
try:
668+
save(func)
669+
except BaseException as exc:
670+
exc.add_note(f'when serializing {_T(obj)} reconstructor')
671+
raise
648672
save(())
649673
write(REDUCE)
650674
elif self.proto >= 2 and func_name == "__newobj__":
@@ -682,12 +706,28 @@ def save_reduce(self, func, args, state=None, listitems=None,
682706
raise PicklingError(
683707
"args[0] from __newobj__ args has the wrong class")
684708
args = args[1:]
685-
save(cls)
686-
save(args)
709+
try:
710+
save(cls)
711+
except BaseException as exc:
712+
exc.add_note(f'when serializing {_T(obj)} class')
713+
raise
714+
try:
715+
save(args)
716+
except BaseException as exc:
717+
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
718+
raise
687719
write(NEWOBJ)
688720
else:
689-
save(func)
690-
save(args)
721+
try:
722+
save(func)
723+
except BaseException as exc:
724+
exc.add_note(f'when serializing {_T(obj)} reconstructor')
725+
raise
726+
try:
727+
save(args)
728+
except BaseException as exc:
729+
exc.add_note(f'when serializing {_T(obj)} reconstructor arguments')
730+
raise
691731
write(REDUCE)
692732

693733
if obj is not None:
@@ -705,23 +745,35 @@ def save_reduce(self, func, args, state=None, listitems=None,
705745
# items and dict items (as (key, value) tuples), or None.
706746

707747
if listitems is not None:
708-
self._batch_appends(listitems)
748+
self._batch_appends(listitems, obj)
709749

710750
if dictitems is not None:
711-
self._batch_setitems(dictitems)
751+
self._batch_setitems(dictitems, obj)
712752

713753
if state is not None:
714754
if state_setter is None:
715-
save(state)
755+
try:
756+
save(state)
757+
except BaseException as exc:
758+
exc.add_note(f'when serializing {_T(obj)} state')
759+
raise
716760
write(BUILD)
717761
else:
718762
# If a state_setter is specified, call it instead of load_build
719763
# to update obj's with its previous state.
720764
# First, push state_setter and its tuple of expected arguments
721765
# (obj, state) onto the stack.
722-
save(state_setter)
766+
try:
767+
save(state_setter)
768+
except BaseException as exc:
769+
exc.add_note(f'when serializing {_T(obj)} state setter')
770+
raise
723771
save(obj) # simple BINGET opcode as obj is already memoized.
724-
save(state)
772+
try:
773+
save(state)
774+
except BaseException as exc:
775+
exc.add_note(f'when serializing {_T(obj)} state')
776+
raise
725777
write(TUPLE2)
726778
# Trigger a state_setter(obj, state) function call.
727779
write(REDUCE)
@@ -901,8 +953,13 @@ def save_tuple(self, obj):
901953
save = self.save
902954
memo = self.memo
903955
if n <= 3 and self.proto >= 2:
904-
for element in obj:
905-
save(element)
956+
for i, element in enumerate(obj):
957+
try:
958+
save(element)
959+
except BaseException as exc:
960+
exc.add_note(
961+
f'when serializing {_T(obj)} item {i}')
962+
raise
906963
# Subtle. Same as in the big comment below.
907964
if id(obj) in memo:
908965
get = self.get(memo[id(obj)][0])
@@ -916,8 +973,13 @@ def save_tuple(self, obj):
916973
# has more than 3 elements.
917974
write = self.write
918975
write(MARK)
919-
for element in obj:
920-
save(element)
976+
for i, element in enumerate(obj):
977+
try:
978+
save(element)
979+
except BaseException as exc:
980+
exc.add_note(
981+
f'when serializing {_T(obj)} item {i}')
982+
raise
921983

922984
if id(obj) in memo:
923985
# Subtle. d was not in memo when we entered save_tuple(), so
@@ -947,34 +1009,55 @@ def save_list(self, obj):
9471009
self.write(MARK + LIST)
9481010

9491011
self.memoize(obj)
950-
self._batch_appends(obj)
1012+
self._batch_appends(obj, obj)
9511013

9521014
dispatch[list] = save_list
9531015

9541016
_BATCHSIZE = 1000
9551017

956-
def _batch_appends(self, items):
1018+
def _batch_appends(self, items, obj=None):
9571019
# Helper to batch up APPENDS sequences
9581020
save = self.save
9591021
write = self.write
9601022

9611023
if not self.bin:
962-
for x in items:
963-
save(x)
1024+
for i, x in enumerate(items):
1025+
try:
1026+
save(x)
1027+
except BaseException as exc:
1028+
if obj is not None:
1029+
exc.add_note(
1030+
f'when serializing {_T(obj)} item {i}')
1031+
raise
9641032
write(APPEND)
9651033
return
9661034

1035+
i = 0
9671036
it = iter(items)
9681037
while True:
9691038
tmp = list(islice(it, self._BATCHSIZE))
9701039
n = len(tmp)
9711040
if n > 1:
9721041
write(MARK)
9731042
for x in tmp:
974-
save(x)
1043+
try:
1044+
save(x)
1045+
except BaseException as exc:
1046+
if obj is not None:
1047+
exc.add_note(
1048+
f'when serializing {_T(obj)} item {i}')
1049+
raise
1050+
i += 1
9751051
write(APPENDS)
9761052
elif n:
977-
save(tmp[0])
1053+
try:
1054+
save(tmp[0])
1055+
except BaseException as exc:
1056+
if obj is not None:
1057+
exc.add_note(
1058+
f'when serializing {_T(obj)} item {i}')
1059+
raise
1060+
i += 1
9781061
write(APPEND)
9791062
# else tmp is empty, and we're done
9801063
if n < self._BATCHSIZE:
@@ -987,19 +1070,25 @@ def save_dict(self, obj):
9871070
self.write(MARK + DICT)
9881071

9891072
self.memoize(obj)
990-
self._batch_setitems(obj.items())
1073+
self._batch_setitems(obj.items(), obj)
9911074

9921075
dispatch[dict] = save_dict
9931076

994-
def _batch_setitems(self, items):
1077+
def _batch_setitems(self, items, obj=None):
9951078
# Helper to batch up SETITEMS sequences; proto >= 1 only
9961079
save = self.save
9971080
write = self.write
9981081

9991082
if not self.bin:
10001083
for k, v in items:
10011084
save(k)
1002-
save(v)
1085+
try:
1086+
save(v)
1087+
except BaseException as exc:
1088+
if obj is not None:
1089+
exc.add_note(
1090+
f'when serializing {_T(obj)} item {k!r}')
1091+
raise
10031092
write(SETITEM)
10041093
return
10051094

@@ -1011,12 +1100,24 @@ def _batch_setitems(self, items):
10111100
write(MARK)
10121101
for k, v in tmp:
10131102
save(k)
1014-
save(v)
1103+
try:
1104+
save(v)
1105+
except BaseException as exc:
1106+
if obj is not None:
1107+
exc.add_note(
1108+
f'when serializing {_T(obj)} item {k!r}')
1109+
raise
10151110
write(SETITEMS)
10161111
elif n:
10171112
k, v = tmp[0]
10181113
save(k)
1019-
save(v)
1114+
try:
1115+
save(v)
1116+
except BaseException as exc:
1117+
if obj is not None:
1118+
exc.add_note(
1119+
f'when serializing {_T(obj)} item {k!r}')
1120+
raise
10201121
write(SETITEM)
10211122
# else tmp is empty, and we're done
10221123
if n < self._BATCHSIZE:
@@ -1040,7 +1141,12 @@ def save_set(self, obj):
10401141
if n > 0:
10411142
write(MARK)
10421143
for item in batch:
1043-
save(item)
1144+
try:
1145+
save(item)
1146+
except BaseException as exc:
1147+
exc.add_note(
1148+
f'when serializing {_T(obj)} element')
1149+
raise
10441150
write(ADDITEMS)
10451151
if n < self._BATCHSIZE:
10461152
return
@@ -1056,7 +1162,12 @@ def save_frozenset(self, obj):
10561162

10571163
write(MARK)
10581164
for item in obj:
1059-
save(item)
1165+
try:
1166+
save(item)
1167+
except BaseException as exc:
1168+
exc.add_note(
1169+
f'when serializing {_T(obj)} element')
1170+
raise
10601171

10611172
if id(obj) in self.memo:
10621173
# If the object is already in the memo, this means it is

0 commit comments

Comments
 (0)