Skip to content

Commit e5e34bb

Browse files
dguidoclaudethomas-chauchefoin-tobgithub-code-quality[bot]
authored
Add DoS protection against expansion attacks (Billion Laughs style) (#211)
* Add DoS protection against expansion attacks (Billion Laughs style) Implement defense in depth against exponential expansion attacks that could cause Fickling to hang or consume excessive memory: 1. Static pattern detection via ExpansionAttackAnalysis: - Detects high GET/PUT ratio (>10x suspicious, >50x likely unsafe) - Detects excessive DUP operations (>100 suspicious) 2. Runtime resource limits via InterpreterLimits: - max_opcodes: 1,000,000 - max_stack_depth: 10,000 - max_memo_size: 100,000 - max_get_ratio: 50 (GETs per PUT) 3. New exception types: - ResourceExhaustionError for limit violations - ExpansionAttackError for expansion attack detection 4. Updated opcode classes to track GET/PUT operations: - BinGet, LongBinGet, Get call track_get() - BinPut, Put, LongBinPut, Memoize call track_put() 5. AnalysisContext catches ResourceExhaustionError and returns LIKELY_OVERTLY_MALICIOUS severity instead of propagating exception Fixes #111 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Mark test_cyclic_pickle_dos as expected failure The cyclic AST recursion issue (#196) is being fixed separately in PR #213. Mark this test as expected failure so PR #211 CI can pass. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix review issues in DoS protection implementation - Move _check_limits() after opcode.run() so counters are current - Add ResourceExhaustionError handling in Pickled.ast (returns empty AST) - Broaden AnalysisContext.analyze() catch to handle ValueError, IndexError, RecursionError from malformed pickles - Handle put_count == 0 edge case in ExpansionAttackAnalysis - Simplify combined indicators logic (remove redundant condition) - Make InterpreterLimits frozen with __post_init__ validation - Hardcode resource_type in ExpansionAttackError - Use round() instead of int() for ratio in error messages - Add ResourceExhaustionError handling in CLI decompile path - Split resource limit tests per limit type (opcodes, stack, memo) - Add InterpreterLimits validation test - Remove @expectedfailure from test_cyclic_pickle_dos (now handled) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix failing linting * Fix failing linting * Make ExpansionAttackAnalysis thresholds configurable via __init__ Allows callers to override GET/PUT ratio and DUP count thresholds without monkey-patching class attributes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add minimal test cases for nested Billion Laughs expansion patterns Reproduces globalLaughs.pt (DUP-based) and billionLaughsAlt.pkl (memo-based) DoS patterns that evade current flat thresholds by spreading operations across nested LIST layers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Make resource exhaustion detection intentional via dedicated analysis Previously detection depended on UnusedVariables accidentally re-triggering the error. Now Pickled sets a flag and a dedicated analysis checks it. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Potential fix for pull request finding 'Unused import' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> * Potential fix for pull request finding 'Imprecise assert' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> * Potential fix for pull request finding 'Imprecise assert' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Thomas Chauchefoin <thomas.chauchefoin@trailofbits.com> Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com>
1 parent 240f61e commit e5e34bb

File tree

6 files changed

+538
-10
lines changed

6 files changed

+538
-10
lines changed

fickling/analysis.py

Lines changed: 141 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,22 @@
77
from collections.abc import Iterable, Iterator
88
from enum import Enum
99

10+
from fickling.exception import ResourceExhaustionError
1011
from fickling.fickle import (
1112
BUILTIN_MODULE_NAMES,
1213
SAFE_BUILTINS,
14+
BinGet,
15+
BinPut,
16+
Dup,
17+
Get,
1318
InterpretationError,
1419
Interpreter,
20+
LongBinGet,
21+
LongBinPut,
22+
Memoize,
1523
Pickled,
1624
Proto,
25+
Put,
1726
)
1827

1928

@@ -35,7 +44,29 @@ def __init__(self, pickled: Pickled):
3544
self.results_by_analysis: dict[type[Analysis], list[AnalysisResult]] = defaultdict(list)
3645

3746
def analyze(self, analysis: Analysis) -> list[AnalysisResult]:
38-
results = list(analysis.analyze(self))
47+
try:
48+
results = list(analysis.analyze(self))
49+
except ResourceExhaustionError as e:
50+
# Resource limits exceeded - this is a DoS attack indicator
51+
results = [
52+
AnalysisResult(
53+
Severity.LIKELY_OVERTLY_MALICIOUS,
54+
f"Resource exhaustion detected during analysis: {e}; "
55+
f"this is indicative of an expansion attack (Billion Laughs style)",
56+
"ResourceExhaustion",
57+
trigger=f"{e.resource_type}: {e.actual}",
58+
)
59+
]
60+
except (ValueError, IndexError, RecursionError) as e:
61+
# Malformed pickle caused an interpretation error
62+
results = [
63+
AnalysisResult(
64+
Severity.LIKELY_UNSAFE,
65+
f"The pickle file has malformed opcode sequences ({type(e).__name__}: {e}); "
66+
f"it is either corrupted or attempting to bypass the pickle security analysis",
67+
"InterpretationError",
68+
)
69+
]
3970
if not results:
4071
self.results_by_analysis[type(analysis)].append(AnalysisResult(Severity.LIKELY_SAFE))
4172
else:
@@ -210,6 +241,17 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
210241
)
211242

212243

244+
class ResourceExhaustionAnalysis(Analysis):
245+
def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
246+
if context.pickled.has_resource_exhaustion:
247+
yield AnalysisResult(
248+
Severity.LIKELY_OVERTLY_MALICIOUS,
249+
"Resource limits were exceeded during interpretation; "
250+
"this is indicative of an expansion attack (Billion Laughs style)",
251+
"ResourceExhaustion",
252+
)
253+
254+
213255
class NonStandardImports(Analysis):
214256
def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
215257
for node in context.pickled.non_standard_imports():
@@ -401,8 +443,8 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
401443
interpreter = Interpreter(context.pickled)
402444
try:
403445
unused = interpreter.unused_assignments()
404-
except InterpretationError:
405-
# Malformed pickle - InterpretationErrorAnalysis will report this
446+
except (InterpretationError, ResourceExhaustionError):
447+
# Malformed pickle or resource exhaustion - dedicated analyses will report this
406448
return
407449
for varname, asmt in unused.items():
408450
shortened, _ = context.shorten_code(asmt.value)
@@ -415,6 +457,102 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
415457
)
416458

417459

460+
class ExpansionAttackAnalysis(Analysis):
461+
"""Detects potential exponential expansion attacks (Billion Laughs style).
462+
463+
These attacks use:
464+
- High GET/PUT ratio: Many GET operations retrieving memoized values
465+
- Excessive DUP operations: Duplicating stack items repeatedly
466+
"""
467+
468+
# Thresholds for pattern detection
469+
DEFAULT_GET_PUT_RATIO_THRESHOLD = 10 # GETs per PUT that is suspicious
470+
DEFAULT_HIGH_GET_PUT_RATIO_THRESHOLD = 50 # Extremely high ratio
471+
DEFAULT_DUP_COUNT_THRESHOLD = 100 # Number of DUPs that is suspicious
472+
473+
def __init__(
474+
self,
475+
*,
476+
get_put_ratio_threshold: int = DEFAULT_GET_PUT_RATIO_THRESHOLD,
477+
high_get_put_ratio_threshold: int = DEFAULT_HIGH_GET_PUT_RATIO_THRESHOLD,
478+
dup_count_threshold: int = DEFAULT_DUP_COUNT_THRESHOLD,
479+
):
480+
self._get_put_ratio_threshold = get_put_ratio_threshold
481+
self._high_get_put_ratio_threshold = high_get_put_ratio_threshold
482+
self._dup_count_threshold = dup_count_threshold
483+
484+
def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
485+
get_count = 0
486+
put_count = 0
487+
dup_count = 0
488+
489+
for opcode in context.pickled:
490+
if isinstance(opcode, BinGet | LongBinGet | Get):
491+
get_count += 1
492+
elif isinstance(opcode, BinPut | LongBinPut | Put | Memoize):
493+
put_count += 1
494+
elif isinstance(opcode, Dup):
495+
dup_count += 1
496+
497+
findings: list[AnalysisResult] = []
498+
499+
# Check for high GET/PUT ratio
500+
if put_count > 0:
501+
ratio = get_count / put_count
502+
if ratio > self._high_get_put_ratio_threshold:
503+
findings.append(
504+
AnalysisResult(
505+
Severity.LIKELY_UNSAFE,
506+
f"Extremely high GET/PUT ratio ({ratio:.1f}:1) detected; "
507+
f"this pattern is indicative of an exponential expansion attack "
508+
f"(Billion Laughs style) that could cause DoS",
509+
"ExpansionAttackAnalysis",
510+
trigger=f"GET/PUT ratio: {ratio:.1f}:1",
511+
)
512+
)
513+
elif ratio > self._get_put_ratio_threshold:
514+
findings.append(
515+
AnalysisResult(
516+
Severity.SUSPICIOUS,
517+
f"High GET/PUT ratio ({ratio:.1f}:1) detected; "
518+
f"this may indicate an expansion attack pattern",
519+
"ExpansionAttackAnalysis",
520+
trigger=f"GET/PUT ratio: {ratio:.1f}:1",
521+
)
522+
)
523+
elif get_count > self._get_put_ratio_threshold:
524+
# GETs with no PUTs is inherently malformed/malicious
525+
findings.append(
526+
AnalysisResult(
527+
Severity.LIKELY_UNSAFE,
528+
f"GET operations ({get_count}) with no PUT operations detected; "
529+
f"this is indicative of a malformed or malicious pickle",
530+
"ExpansionAttackAnalysis",
531+
trigger=f"GET count: {get_count}, PUT count: 0",
532+
)
533+
)
534+
535+
# Check for excessive DUP operations
536+
if dup_count > self._dup_count_threshold:
537+
findings.append(
538+
AnalysisResult(
539+
Severity.SUSPICIOUS,
540+
f"Excessive DUP operations ({dup_count}) detected; "
541+
f"this may indicate a stack duplication attack",
542+
"ExpansionAttackAnalysis",
543+
trigger=f"DUP count: {dup_count}",
544+
)
545+
)
546+
547+
# Multiple indicators together are more severe
548+
if len(findings) > 1:
549+
for finding in findings:
550+
if finding.severity < Severity.LIKELY_UNSAFE:
551+
finding.severity = Severity.LIKELY_UNSAFE
552+
553+
yield from findings
554+
555+
418556
class AnalysisResults:
419557
def __init__(self, pickled: Pickled, results: Iterable[AnalysisResult]):
420558
self.pickled: Pickled = pickled

fickling/cli.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from . import __version__, fickle, tracing
88
from .analysis import Severity, check_safety
99
from .constants import EXIT_CLEAN, EXIT_ERROR, EXIT_UNSAFE
10+
from .exception import ResourceExhaustionError
1011

1112
DEFAULT_JSON_OUTPUT_FILE = "safety_results.json"
1213

@@ -183,11 +184,19 @@ def main(argv: list[str] | None = None) -> int:
183184
interpreter = fickle.Interpreter(
184185
pickled, first_variable_id=var_id, result_variable=f"result{i}"
185186
)
186-
if args.trace:
187-
trace = tracing.Trace(interpreter)
188-
print(unparse(trace.run()))
189-
else:
190-
print(unparse(interpreter.to_ast()))
187+
try:
188+
if args.trace:
189+
trace = tracing.Trace(interpreter)
190+
print(unparse(trace.run()))
191+
else:
192+
print(unparse(interpreter.to_ast()))
193+
except ResourceExhaustionError as e:
194+
sys.stderr.write(
195+
f"Error: {e}\n"
196+
"This pickle file may contain an expansion attack. "
197+
"Use --check-safety to analyze it.\n"
198+
)
199+
return 1
191200
var_id = interpreter.next_variable_id
192201
else:
193202
pickled = fickle.Pickled(

fickling/exception.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,22 @@ def __init__(self, msg):
1515

1616
def __str__(self):
1717
return self.msg
18+
19+
20+
class ResourceExhaustionError(Exception):
21+
"""Raised when resource limits are exceeded during analysis."""
22+
23+
def __init__(self, resource_type: str, limit: int, actual: int):
24+
self.resource_type = resource_type
25+
self.limit = limit
26+
self.actual = actual
27+
super().__init__(
28+
f"Resource limit exceeded: {resource_type} (limit={limit}, actual={actual})"
29+
)
30+
31+
32+
class ExpansionAttackError(ResourceExhaustionError):
33+
"""Raised when exponential expansion attack (Billion Laughs style) is detected."""
34+
35+
def __init__(self, limit: int, actual: int):
36+
super().__init__("get_ratio", limit, actual)

0 commit comments

Comments
 (0)