Skip to content

Commit a22a9a9

Browse files
dguidoclaudethomas-chauchefoin-tob
authored
Fix cyclic AST recursion DoS (issue #196) (#213)
* Fix cyclic AST recursion DoS (issue #196) Cyclic pickle structures (e.g., `L = []; L.append(L)`) caused RecursionError when fickling analyzed them. This fix detects cycles at mutation points (Append, Appends, SetItem, SetItems, AddItems) and replaces self-references with `<cyclic-ref>` placeholder AST nodes. Changes: - Add `_has_cycle` flag to Interpreter to track cycle detection - Modify mutation operations to check if value being added `is` the container - Add `has_cycles` property to Pickled class for downstream awareness - Update tests to verify the fix works correctly Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Restore cycle detection in ASTProperties visitor This was erroneously added and later removed in PR #210. The visitor override tracks visited nodes by id to prevent infinite recursion when traversing cyclic AST structures created via MEMOIZE + GET opcodes. Co-Authored-By: Dan Guido <dan@trailofbits.com> Co-Authored-By: Claude Code <noreply@anthropic.com> * Merge and uniformize tests * Add tests for all cycle detection corner cases Cover dict value cycles, dict key cycles, and set cycles to ensure the cycle detection feature handles all supported mutation opcodes (SetItem, AddItems). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix AddItems to peek instead of pop the set Match Python's pickle implementation which uses stack[-1] to peek at the set without removing it from the stack. Ref: https://github.com/python/cpython/blob/6ea3f8cd7ff4ff9769ae276dabc0753e09dca998/Lib/pickle.py#L1840 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Use Python-style repr for cyclic references Match Python's repr output for cyclic structures: - List cycles: [[...]] instead of [<cyclic-ref>] - Dict cycles: {'key': {...}} instead of {'key': <cyclic-ref>} - Set cycles: {{...}} instead of {<cyclic-ref>} This makes fickling's AST output directly comparable to what Python displays when printing cyclic data structures. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Raise InterpretationError for impossible cyclic structures Match Python's behavior for unhashable cyclic references: - Dict key cycles: raise InterpretationError (Python raises TypeError) - Set cycles: raise InterpretationError (Python raises TypeError) - Dict value cycles: still allowed with {...} placeholder - List cycles: still allowed with [...] placeholder This distinguishes between valid cyclic structures that Python can represent (lists containing themselves, dicts with self-referencing values) and impossible structures that would fail at runtime. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Thomas Chauchefoin <thomas.chauchefoin@trailofbits.com>
1 parent 19e828d commit a22a9a9

File tree

2 files changed

+141
-4
lines changed

2 files changed

+141
-4
lines changed

fickling/fickle.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,19 @@ def __init__(self):
528528
self.calls: list[ast.Call] = []
529529
self.non_setstate_calls: list[ast.Call] = []
530530
self.likely_safe_imports: set[str] = set()
531+
self._visited: set[int] = set() # Track visited nodes by id to detect cycles
532+
533+
def visit(self, node: ast.AST) -> Any:
534+
"""Override visit to detect and skip cycles in the AST.
535+
536+
Pickle files can create cyclic AST structures via MEMOIZE + GET opcodes.
537+
Without cycle detection, visiting such structures causes infinite recursion.
538+
"""
539+
node_id = id(node)
540+
if node_id in self._visited:
541+
return None # Skip already-visited nodes
542+
self._visited.add(node_id)
543+
return super().visit(node)
531544

532545
def _process_import(self, node: ast.Import | ast.ImportFrom):
533546
self.imports.append(node)
@@ -572,6 +585,8 @@ def __init__(self, opcodes: Iterable[Opcode], has_invalid_opcode: bool = False):
572585
# Whether the pickled sequence was interrupted because of
573586
# an invalid opcode
574587
self._has_invalid_opcode: bool = has_invalid_opcode
588+
# Whether the pickle contains cyclic references
589+
self._has_cycles: bool = False
575590
self._has_interpretation_error: bool = False
576591

577592
def __len__(self) -> int:
@@ -891,6 +906,7 @@ def has_invalid_opcode(self) -> bool:
891906

892907
@property
893908
def has_interpretation_error(self) -> bool:
909+
_ = self.ast # Ensure interpretation ran
894910
return self._has_interpretation_error
895911

896912
@staticmethod
@@ -1036,7 +1052,9 @@ def non_standard_imports(self) -> Iterator[ast.Import | ast.ImportFrom]:
10361052
def ast(self) -> ast.Module:
10371053
if self._ast is None:
10381054
try:
1039-
self._ast = Interpreter.interpret(self)
1055+
interpreter = Interpreter(self)
1056+
self._ast = interpreter.to_ast()
1057+
self._has_cycles = interpreter._has_cycle
10401058
except InterpretationError as e:
10411059
self._has_interpretation_error = True
10421060
sys.stderr.write(
@@ -1046,6 +1064,12 @@ def ast(self) -> ast.Module:
10461064
self._ast = ast.Module(body=[], type_ignores=[])
10471065
return self._ast
10481066

1067+
@property
1068+
def has_cycles(self) -> bool:
1069+
"""Check if the pickle contains cyclic references."""
1070+
_ = self.ast # Ensure interpretation ran
1071+
return self._has_cycles
1072+
10491073
@property
10501074
def nb_opcodes(self) -> int:
10511075
return len(self._opcodes)
@@ -1130,6 +1154,7 @@ def __init__(
11301154
self._module: ast.Module | None = None
11311155
self._var_counter: int = first_variable_id
11321156
self._opcodes: Iterator[Opcode] = iter(pickled)
1157+
self._has_cycle: bool = False
11331158

11341159
@property
11351160
def next_variable_id(self) -> int:
@@ -1477,11 +1502,15 @@ def run(self, interpreter: Interpreter):
14771502
raise InterpretationError("Exhausted the stack while searching for a MarkObject!")
14781503
if not interpreter.stack:
14791504
raise ValueError("Stack was empty; expected a pyset")
1480-
pyset = interpreter.stack.pop()
1505+
pyset = interpreter.stack[-1]
14811506
if not isinstance(pyset, ast.Set):
14821507
raise ValueError(
14831508
f"{pyset!r} was expected to be a set-like object with an `add` function"
14841509
)
1510+
# Check for cyclic references - sets cannot contain themselves (unhashable)
1511+
for elem in to_add:
1512+
if elem is pyset:
1513+
raise InterpretationError("Set cannot contain itself (unhashable type)")
14851514
pyset.elts.extend(reversed(to_add))
14861515

14871516

@@ -1760,11 +1789,17 @@ def create(memo_id: int) -> Get:
17601789
class SetItems(StackSliceOpcode):
17611790
name = "SETITEMS"
17621791

1763-
def run(self, interpreter: Interpreter, stack_slice: List[ast.expr]):
1792+
def run(self, interpreter: Interpreter, stack_slice: list[ast.expr]):
17641793
pydict = interpreter.stack.pop()
17651794
update_dict_keys = []
17661795
update_dict_values = []
17671796
for key, value in zip(stack_slice[::2], stack_slice[1::2], strict=False):
1797+
# Check for cyclic references
1798+
if key is pydict:
1799+
raise InterpretationError("Dict cannot use itself as key (unhashable type)")
1800+
if value is pydict:
1801+
value = ast.Set(elts=[ast.Constant(value=...)])
1802+
interpreter._has_cycle = True
17681803
update_dict_keys.append(key)
17691804
update_dict_values.append(value)
17701805
if isinstance(pydict, ast.Dict) and not pydict.keys:
@@ -1792,6 +1827,12 @@ def run(self, interpreter: Interpreter):
17921827
value = interpreter.stack.pop()
17931828
key = interpreter.stack.pop()
17941829
pydict = interpreter.stack.pop()
1830+
# Check for cyclic references
1831+
if key is pydict:
1832+
raise InterpretationError("Dict cannot use itself as key (unhashable type)")
1833+
if value is pydict:
1834+
value = ast.Set(elts=[ast.Constant(value=...)])
1835+
interpreter._has_cycle = True
17951836
if isinstance(pydict, ast.Dict) and not pydict.keys:
17961837
# the dict is empty, so add a new one
17971838
interpreter.stack.append(ast.Dict(keys=[key], values=[value]))
@@ -1871,6 +1912,9 @@ def run(self, interpreter: Interpreter):
18711912
value = interpreter.stack.pop()
18721913
list_obj = interpreter.stack[-1]
18731914
if isinstance(list_obj, ast.List):
1915+
if value is list_obj:
1916+
value = ast.List(elts=[ast.Constant(value=...)], ctx=ast.Load())
1917+
interpreter._has_cycle = True
18741918
list_obj.elts.append(value)
18751919
else:
18761920
raise ValueError(f"Expected a list on the stack, but instead found {list_obj!r}")
@@ -1879,9 +1923,13 @@ def run(self, interpreter: Interpreter):
18791923
class Appends(StackSliceOpcode):
18801924
name = "APPENDS"
18811925

1882-
def run(self, interpreter: Interpreter, stack_slice: List[ast.expr]):
1926+
def run(self, interpreter: Interpreter, stack_slice: list[ast.expr]):
18831927
list_obj = interpreter.stack[-1]
18841928
if isinstance(list_obj, ast.List):
1929+
for i, elem in enumerate(stack_slice):
1930+
if elem is list_obj:
1931+
stack_slice[i] = ast.List(elts=[ast.Constant(value=...)], ctx=ast.Load())
1932+
interpreter._has_cycle = True
18851933
list_obj.elts.extend(stack_slice)
18861934
else:
18871935
raise ValueError(f"Expected a list on the stack, but instead found {list_obj!r}")

test/test_crashes.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,24 @@
1010

1111
from fickling.analysis import Severity, check_safety
1212
from fickling.fickle import (
13+
AddItems,
14+
Append,
1315
BinGet,
1416
BinInt1,
1517
BinPut,
1618
BinUnicode,
19+
EmptyDict,
20+
EmptyList,
21+
EmptySet,
22+
Get,
1723
Global,
1824
Mark,
1925
Memoize,
2026
Pickled,
2127
Pop,
2228
Proto,
2329
Reduce,
30+
SetItem,
2431
ShortBinUnicode,
2532
StackGlobal,
2633
Stop,
@@ -164,3 +171,85 @@ def test_missing_mark_before_tuple(self):
164171
# Safety check should flag it
165172
results = check_safety(loaded)
166173
self.assertGreater(results.severity, Severity.LIKELY_SAFE)
174+
175+
def test_cyclic_pickle(self):
176+
"""Reproduces https://github.com/trailofbits/fickling/issues/196"""
177+
# List with itself as value: L = []; L.append(L)
178+
pickled = Pickled(
179+
[
180+
Proto(2),
181+
EmptyList(),
182+
Memoize(),
183+
Get(0),
184+
Append(),
185+
Stop(),
186+
]
187+
)
188+
189+
# Should detect cycles
190+
self.assertTrue(pickled.has_cycles)
191+
192+
# Should complete without RecursionError
193+
result = check_safety(pickled)
194+
self.assertIsNotNone(result)
195+
196+
# Cyclic reference should be replaced with placeholders
197+
code = unparse(pickled.ast)
198+
self.assertEqual("result = [[...]]", code)
199+
200+
# Dict with itself as value: d = {}; d["self"] = d
201+
dict_value_cycle = Pickled(
202+
[
203+
Proto(2),
204+
EmptyDict(),
205+
Memoize(),
206+
ShortBinUnicode("self"),
207+
Get(0),
208+
SetItem(),
209+
Stop(),
210+
]
211+
)
212+
self.assertTrue(dict_value_cycle.has_cycles)
213+
self.assertEqual("result = {'self': {...}}", unparse(dict_value_cycle.ast))
214+
215+
def test_impossible_cyclic_pickle(self):
216+
"""Test that impossible cyclic structures raise InterpretationError."""
217+
# Dict with itself as key: d = {}; d[d] = "value"
218+
# Python raises: TypeError: unhashable type: 'dict'
219+
dict_key_cycle = Pickled(
220+
[
221+
Proto(2),
222+
EmptyDict(),
223+
Memoize(),
224+
Get(0),
225+
ShortBinUnicode("value"),
226+
SetItem(),
227+
Stop(),
228+
]
229+
)
230+
# Should flag as having interpretation error
231+
self.assertTrue(dict_key_cycle.has_interpretation_error)
232+
233+
# Safety check should flag it
234+
results = check_safety(dict_key_cycle)
235+
self.assertGreater(results.severity, Severity.LIKELY_SAFE)
236+
237+
# Set containing itself: s = set(); s.add(s)
238+
# Python raises: TypeError: unhashable type: 'set'
239+
set_cycle = Pickled(
240+
[
241+
Proto(4),
242+
EmptySet(),
243+
Memoize(),
244+
Mark(),
245+
Get(0),
246+
AddItems(),
247+
Stop(),
248+
]
249+
)
250+
# Should flag as having interpretation error
251+
self.assertTrue(set_cycle.has_interpretation_error)
252+
253+
# Safety check should flag it
254+
results = check_safety(set_cycle)
255+
self.assertGreater(results.severity, Severity.LIKELY_SAFE)

0 commit comments

Comments
 (0)