Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements_extras.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Add packages needed to generate tests
-r requirements.txt
cloudpickle==3.1.1
torch==2.8.0
10 changes: 10 additions & 0 deletions src/picklescan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ def __str__(self) -> str:
"open",
"breakpoint",
},
# cloudpickle can reconstruct arbitrary callables via CodeType, enabling code execution
"cloudpickle.cloudpickle": {
"_builtin_type", # Used to access CodeType for arbitrary code construction
"_make_function", # Creates functions from code objects
"_function_setstate", # Sets internal function state
"_make_cell", # Creates closure cells
"_make_empty_cell", # Creates empty closure cells
"subimport", # Imports arbitrary modules
},
"types": {"CodeType"}, # Can construct arbitrary code objects for execution
"aiohttp": "*",
"asyncio": "*",
"bdb": "*",
Expand Down
Binary file added tests/data2/cloudpickle_codeinjection.pkl
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data2/types_CodeType.pkl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
€Œtypes”ŒCodeType”“”)R.
48 changes: 48 additions & 0 deletions tests/init_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,33 @@ def reduce_urllib_request_urlopen():
return urllib.request.urlopen, ("https://example.invalid",)


def initialize_cloudpickle_exploit_file(path: str):
"""Create a pickle file using cloudpickle that embeds arbitrary code via CodeType.

This reproduces the vulnerability where cloudpickle uses _make_function and _builtin_type
with CodeType to reconstruct arbitrary callables that can execute malicious code.
"""
import cloudpickle
import builtins

class EvilClass:
@staticmethod
def _obfuscated_eval(payload):
getattr(builtins, "eval")(payload)

def __reduce__(self):
payload = "__import__('os').system('echo \"successful attack\"')"
return self._obfuscated_eval, (payload,)

if os.path.exists(path):
print(f"File {path} already exists, skipping initialization.")
return

with open(path, mode="wb") as f:
cloudpickle.dump(EvilClass(), f)
print(f"Initialized file {path}.")


def initialize_pickle_file(path: str, obj: Any, version: int):
if os.path.exists(path):
print(f"File {path} already exists, skipping initialization.")
Expand Down Expand Up @@ -816,6 +843,27 @@ def initialize_pickle_files():
initialize_pickle_file_from_reduce("io_FileIO.pkl", reduce_io_FileIO)
initialize_pickle_file_from_reduce("urllib_request_urlopen.pkl", reduce_urllib_request_urlopen)

# types.CodeType can construct arbitrary code objects - using raw opcodes since CodeType can't be pickled directly
initialize_data_file(
f"{_root_path}/data2/types_CodeType.pkl",
b"".join(
[
pickle.PROTO + b"\x04", # Protocol 4
pickle.SHORT_BINUNICODE + bytes([5]) + b"types",
pickle.MEMOIZE,
pickle.SHORT_BINUNICODE + bytes([8]) + b"CodeType",
pickle.MEMOIZE,
pickle.STACK_GLOBAL,
pickle.MEMOIZE,
pickle.EMPTY_TUPLE,
pickle.REDUCE, # types.CodeType() - will fail but scanner should detect it
pickle.STOP,
]
),
)

initialize_cloudpickle_exploit_file(f"{_root_path}/data2/cloudpickle_codeinjection.pkl")

initialize_not_a_pickle_file(f"{_root_path}/data/not_a_pickle.bin")


Expand Down
14 changes: 14 additions & 0 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,20 @@ def test_scan_file_path():
)
assert_scan("io_FileIO.pkl", [Global("_io", "FileIO", SafetyLevel.Dangerous)])
assert_scan("urllib_request_urlopen.pkl", [Global("urllib.request", "urlopen", SafetyLevel.Dangerous)])
# types.CodeType can construct arbitrary code objects for execution
assert_scan("types_CodeType.pkl", [Global("types", "CodeType", SafetyLevel.Dangerous)])
# cloudpickle uses _make_function and _builtin_type with CodeType to reconstruct arbitrary callables
assert_scan(
"cloudpickle_codeinjection.pkl",
[
Global("cloudpickle.cloudpickle", "_function_setstate", SafetyLevel.Dangerous),
Global("cloudpickle.cloudpickle", "_builtin_type", SafetyLevel.Dangerous),
Global("cloudpickle.cloudpickle", "_make_function", SafetyLevel.Dangerous),
Global("cloudpickle.cloudpickle", "_make_cell", SafetyLevel.Dangerous),
Global("cloudpickle.cloudpickle", "_make_empty_cell", SafetyLevel.Dangerous),
Global("cloudpickle.cloudpickle", "subimport", SafetyLevel.Dangerous),
],
)


def test_scan_file_path_npz():
Expand Down
Loading