Skip to content

Commit 173c8f2

Browse files
authored
1 parent 87629cb commit 173c8f2

File tree

6 files changed

+74
-0
lines changed

6 files changed

+74
-0
lines changed

requirements_extras.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
# Add packages needed to generate tests
22
-r requirements.txt
3+
cloudpickle==3.1.1
34
torch==2.8.0

src/picklescan/scanner.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ def __str__(self) -> str:
118118
"open",
119119
"breakpoint",
120120
},
121+
# cloudpickle can reconstruct arbitrary callables via CodeType, enabling code execution
122+
"cloudpickle.cloudpickle": {
123+
"_builtin_type", # Used to access CodeType for arbitrary code construction
124+
"_make_function", # Creates functions from code objects
125+
"_function_setstate", # Sets internal function state
126+
"_make_cell", # Creates closure cells
127+
"_make_empty_cell", # Creates empty closure cells
128+
"subimport", # Imports arbitrary modules
129+
},
130+
"types": {"CodeType"}, # Can construct arbitrary code objects for execution
121131
"aiohttp": "*",
122132
"asyncio": "*",
123133
"bdb": "*",
861 Bytes
Binary file not shown.

tests/data2/types_CodeType.pkl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
��types��CodeType���)R.

tests/init_data_files.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,33 @@ def reduce_urllib_request_urlopen():
367367
return urllib.request.urlopen, ("https://example.invalid",)
368368

369369

370+
def initialize_cloudpickle_exploit_file(path: str):
371+
"""Create a pickle file using cloudpickle that embeds arbitrary code via CodeType.
372+
373+
This reproduces the vulnerability where cloudpickle uses _make_function and _builtin_type
374+
with CodeType to reconstruct arbitrary callables that can execute malicious code.
375+
"""
376+
import cloudpickle
377+
import builtins
378+
379+
class EvilClass:
380+
@staticmethod
381+
def _obfuscated_eval(payload):
382+
getattr(builtins, "eval")(payload)
383+
384+
def __reduce__(self):
385+
payload = "__import__('os').system('echo \"successful attack\"')"
386+
return self._obfuscated_eval, (payload,)
387+
388+
if os.path.exists(path):
389+
print(f"File {path} already exists, skipping initialization.")
390+
return
391+
392+
with open(path, mode="wb") as f:
393+
cloudpickle.dump(EvilClass(), f)
394+
print(f"Initialized file {path}.")
395+
396+
370397
def initialize_pickle_file(path: str, obj: Any, version: int):
371398
if os.path.exists(path):
372399
print(f"File {path} already exists, skipping initialization.")
@@ -816,6 +843,27 @@ def initialize_pickle_files():
816843
initialize_pickle_file_from_reduce("io_FileIO.pkl", reduce_io_FileIO)
817844
initialize_pickle_file_from_reduce("urllib_request_urlopen.pkl", reduce_urllib_request_urlopen)
818845

846+
# types.CodeType can construct arbitrary code objects - using raw opcodes since CodeType can't be pickled directly
847+
initialize_data_file(
848+
f"{_root_path}/data2/types_CodeType.pkl",
849+
b"".join(
850+
[
851+
pickle.PROTO + b"\x04", # Protocol 4
852+
pickle.SHORT_BINUNICODE + bytes([5]) + b"types",
853+
pickle.MEMOIZE,
854+
pickle.SHORT_BINUNICODE + bytes([8]) + b"CodeType",
855+
pickle.MEMOIZE,
856+
pickle.STACK_GLOBAL,
857+
pickle.MEMOIZE,
858+
pickle.EMPTY_TUPLE,
859+
pickle.REDUCE, # types.CodeType() - will fail but scanner should detect it
860+
pickle.STOP,
861+
]
862+
),
863+
)
864+
865+
initialize_cloudpickle_exploit_file(f"{_root_path}/data2/cloudpickle_codeinjection.pkl")
866+
819867
initialize_not_a_pickle_file(f"{_root_path}/data/not_a_pickle.bin")
820868

821869

tests/test_scanner.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,20 @@ def test_scan_file_path():
397397
)
398398
assert_scan("io_FileIO.pkl", [Global("_io", "FileIO", SafetyLevel.Dangerous)])
399399
assert_scan("urllib_request_urlopen.pkl", [Global("urllib.request", "urlopen", SafetyLevel.Dangerous)])
400+
# types.CodeType can construct arbitrary code objects for execution
401+
assert_scan("types_CodeType.pkl", [Global("types", "CodeType", SafetyLevel.Dangerous)])
402+
# cloudpickle uses _make_function and _builtin_type with CodeType to reconstruct arbitrary callables
403+
assert_scan(
404+
"cloudpickle_codeinjection.pkl",
405+
[
406+
Global("cloudpickle.cloudpickle", "_function_setstate", SafetyLevel.Dangerous),
407+
Global("cloudpickle.cloudpickle", "_builtin_type", SafetyLevel.Dangerous),
408+
Global("cloudpickle.cloudpickle", "_make_function", SafetyLevel.Dangerous),
409+
Global("cloudpickle.cloudpickle", "_make_cell", SafetyLevel.Dangerous),
410+
Global("cloudpickle.cloudpickle", "_make_empty_cell", SafetyLevel.Dangerous),
411+
Global("cloudpickle.cloudpickle", "subimport", SafetyLevel.Dangerous),
412+
],
413+
)
400414

401415

402416
def test_scan_file_path_npz():

0 commit comments

Comments
 (0)