Add safe builtins allowlist to prevent false positives (#206)

dguido · claude · thomas-chauchefoin-tob · web-flow · commit 3d656b925ffe · 2026-01-22T14:51:15.000+01:00
* Add safe builtins allowlist to prevent false positives Previously, all imports from the builtins module were flagged as LIKELY_OVERTLY_MALICIOUS, even safe functions like dict(), len(), sorted(), and enumerate(). This caused false positives for legitimate pickle files. Add SAFE_BUILTINS frozenset containing type constructors and pure functions that cannot be used for code execution or system access. Modify both UnsafeImportsML and UnsafeImports analyzers to check individual builtin names against this allowlist. Dangerous builtins like eval, exec, getattr, __import__, and open remain blocked as they can be used for arbitrary code execution. Fixes #205 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix ruff formatting for PR - Format analysis.py and test_bypasses.py with ruff - Remove mypy from pre-commit hooks (was already skipped in CI) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Extract SAFE_BUILTINS and BUILTIN_MODULE_NAMES out of analysis module - Add BUILTIN_MODULE_NAMES constant to eliminate repeated tuple - Move SAFE_BUILTINS from UnsafeImportsML class to fickle.py - Update both UnsafeImportsML and UnsafeImports to use shared constants - Removes cross-class dependency (UnsafeImports no longer references UnsafeImportsML.SAFE_BUILTINS) Co-Authored-By: Claude <noreply@anthropic.com> * Remove type from SAFE_BUILTINS allowlist type() with 3 arguments dynamically creates classes, which could be a building block in exploit chains (e.g., triggering __init_subclass__ or __set_name__ on imported base classes/descriptors). While not directly exploitable in isolation, there's no legitimate reason for a pickle to dynamically create classes, so we exclude it as a defense-in-depth measure. Co-Authored-By: Claude <noreply@anthropic.com> * Make unsafe builtin tests verify both analyzers detect the issue Update test_unsafe_builtins_still_flagged and test_unsafe_builtin_eval_still_flagged to assert that both UnsafeImports and UnsafeImportsML flag dangerous builtins, rather than checking if either one does. Co-Authored-By: Claude <noreply@anthropic.com> * Simplify UnsafeImports builtin check using all() Co-Authored-By: Claude <noreply@anthropic.com> * Format BUILTIN_MODULE_NAMES as single line Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Thomas Chauchefoin <thomas.chauchefoin@trailofbits.com>
diff --git a/fickling/analysis.py b/fickling/analysis.py
@@ -7,7 +7,14 @@
 from collections.abc import Iterable, Iterator
 from enum import Enum
 
-from fickling.fickle import InterpretationError, Interpreter, Pickled, Proto
+from fickling.fickle import (
+    BUILTIN_MODULE_NAMES,
+    SAFE_BUILTINS,
+    InterpretationError,
+    Interpreter,
+    Pickled,
+    Proto,
+)
 
 
 class AnalyzerMeta(type):
@@ -267,13 +274,27 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
             ]
             for module_name in all_modules:
                 if module_name in self.UNSAFE_MODULES:
-                    risk_info = self.UNSAFE_MODULES[module_name]
-                    yield AnalysisResult(
-                        Severity.LIKELY_OVERTLY_MALICIOUS,
-                        f"`{shortened}` uses `{module_name}` that is indicative of a malicious pickle file. {risk_info}",
-                        "UnsafeImportsML",
-                        trigger=shortened,
-                    )
+                    # Special handling for builtins - check specific function names
+                    if module_name in BUILTIN_MODULE_NAMES:
+                        for n in node.names:
+                            if n.name not in SAFE_BUILTINS:
+                                risk_info = self.UNSAFE_MODULES[module_name]
+                                yield AnalysisResult(
+                                    Severity.LIKELY_OVERTLY_MALICIOUS,
+                                    f"`{shortened}` imports `{n.name}` from `{module_name}` "
+                                    f"which can execute arbitrary code. {risk_info}",
+                                    "UnsafeImportsML",
+                                    trigger=shortened,
+                                )
+                    else:
+                        # All other unsafe modules are fully blocked
+                        risk_info = self.UNSAFE_MODULES[module_name]
+                        yield AnalysisResult(
+                            Severity.LIKELY_OVERTLY_MALICIOUS,
+                            f"`{shortened}` uses `{module_name}` that is indicative of a malicious pickle file. {risk_info}",
+                            "UnsafeImportsML",
+                            trigger=shortened,
+                        )
             if node.module in self.UNSAFE_IMPORTS:
                 for n in node.names:
                     if n.name in self.UNSAFE_IMPORTS[node.module]:
@@ -348,6 +369,10 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
 class UnsafeImports(Analysis):
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
         for node in context.pickled.unsafe_imports():
+            if node.module in BUILTIN_MODULE_NAMES and all(
+                n.name in SAFE_BUILTINS for n in node.names
+            ):
+                continue
             shortened, _ = context.shorten_code(node)
             yield AnalysisResult(
                 Severity.LIKELY_OVERTLY_MALICIOUS,
diff --git a/fickling/fickle.py b/fickling/fickle.py
@@ -62,6 +62,72 @@
     ]
 )
 
+BUILTIN_MODULE_NAMES: frozenset[str] = frozenset(["builtins", "__builtins__", "__builtin__"])
+
+# Builtins that are safe to import - pure functions and type constructors
+# that cannot be used for code execution or system access.
+# Dangerous builtins NOT in this list (and thus blocked):
+# - eval, exec, compile: direct code execution
+# - open: file system access
+# - __import__, __loader__, __spec__: import machinery
+# - getattr, setattr, delattr, hasattr: attribute access (can call any method)
+# - globals, locals, vars: namespace access
+# - input: user input (could be abused)
+# - breakpoint: debugger access
+# - memoryview: low-level memory access
+SAFE_BUILTINS: frozenset[str] = frozenset(
+    [
+        # Type constructors (create data, cannot execute code)
+        "bool",
+        "int",
+        "float",
+        "complex",
+        "str",
+        "bytes",
+        "bytearray",
+        "list",
+        "tuple",
+        "set",
+        "frozenset",
+        "dict",
+        # Pure functions (no side effects, no code execution)
+        "len",
+        "abs",
+        "sum",
+        "min",
+        "max",
+        "round",
+        "pow",
+        "divmod",
+        "sorted",
+        "reversed",
+        "enumerate",
+        "zip",
+        "range",
+        "map",
+        "filter",
+        "slice",
+        "iter",
+        "next",
+        "all",
+        "any",
+        "hash",
+        "id",
+        "repr",
+        "ascii",
+        "bin",
+        "hex",
+        "oct",
+        "ord",
+        "chr",
+        "isinstance",
+        "issubclass",
+        "object",
+        "callable",
+        "format",
+    ]
+)
+
 
 def is_std_module(module_name: str) -> bool:
     return module_name in BUILTIN_STDLIB_MODULE_NAMES
diff --git a/test/test_bypasses.py b/test/test_bypasses.py
@@ -376,3 +376,72 @@ def test_builtins_import_bypass(self):
             res.detailed_results()["AnalysisResult"].get("UnsafeImports"),
             "from builtins import getattr",
         )
+
+    def test_safe_builtins_not_flagged(self):
+        """Safe builtins like len, dict should not be flagged as malicious."""
+        pickled = Pickled(
+            [
+                op.Global("builtins len"),
+                op.EmptyList(),
+                op.TupleOne(),
+                op.Reduce(),
+                op.Stop(),
+            ]
+        )
+        res = check_safety(pickled)
+        # Should not have UnsafeImports or UnsafeImportsML result for safe builtins
+        detailed = res.detailed_results().get("AnalysisResult", {})
+        self.assertIsNone(detailed.get("UnsafeImports"))
+        self.assertIsNone(detailed.get("UnsafeImportsML"))
+
+    def test_safe_builtin_dict_not_flagged(self):
+        """Safe builtin dict() should not be flagged as malicious."""
+        pickled = Pickled(
+            [
+                op.Global("builtins dict"),
+                op.EmptyTuple(),
+                op.Reduce(),
+                op.Stop(),
+            ]
+        )
+        res = check_safety(pickled)
+        detailed = res.detailed_results().get("AnalysisResult", {})
+        self.assertIsNone(detailed.get("UnsafeImports"))
+        self.assertIsNone(detailed.get("UnsafeImportsML"))
+
+    def test_unsafe_builtins_still_flagged(self):
+        """Dangerous builtins like getattr, __import__ must still be flagged."""
+        pickled = Pickled(
+            [
+                op.Global("builtins getattr"),
+                op.String("os"),
+                op.String("system"),
+                op.TupleTwo(),
+                op.Reduce(),
+                op.Stop(),
+            ]
+        )
+        res = check_safety(pickled)
+        self.assertGreater(res.severity, Severity.LIKELY_SAFE)
+        # Should be flagged by both unsafe import checkers
+        detailed = res.detailed_results().get("AnalysisResult", {})
+        self.assertIsNotNone(detailed.get("UnsafeImports"))
+        self.assertIsNotNone(detailed.get("UnsafeImportsML"))
+
+    def test_unsafe_builtin_eval_still_flagged(self):
+        """Dangerous builtin eval must still be flagged."""
+        pickled = Pickled(
+            [
+                op.Global("builtins eval"),
+                op.String("print('hello')"),
+                op.TupleOne(),
+                op.Reduce(),
+                op.Stop(),
+            ]
+        )
+        res = check_safety(pickled)
+        self.assertGreater(res.severity, Severity.LIKELY_SAFE)
+        # Should be flagged by both unsafe import checkers
+        detailed = res.detailed_results().get("AnalysisResult", {})
+        self.assertIsNotNone(detailed.get("UnsafeImports"))
+        self.assertIsNotNone(detailed.get("UnsafeImportsML"))