Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion lib/crewai/src/crewai/utilities/file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,38 @@
from typing_extensions import Unpack


_SAFE_PICKLE_GLOBALS: frozenset[tuple[str, str]] = frozenset(
{
# Training artifacts should only contain primitive/container types.
("builtins", "dict"),
("builtins", "list"),
("builtins", "set"),
("builtins", "frozenset"),
("builtins", "tuple"),
("builtins", "str"),
("builtins", "bytes"),
("builtins", "bytearray"),
("builtins", "int"),
("builtins", "float"),
("builtins", "complex"),
("builtins", "bool"),
("builtins", "NoneType"),
}
)


class _RestrictedUnpickler(pickle.Unpickler):
"""Unpickler that restricts globals to an allowlist (CWE-502)."""

def find_class(self, module: str, name: str) -> object: # type: ignore[override]
if (module, name) in _SAFE_PICKLE_GLOBALS:
return super().find_class(module, name)
raise pickle.UnpicklingError(
f"Refusing to unpickle '{module}.{name}': global not in allowlist. "
"Training artifacts must not require arbitrary class imports."
)


class LogEntry(TypedDict, total=False):
"""TypedDict for log entry kwargs with optional fields for flexibility."""

Expand Down Expand Up @@ -163,6 +195,6 @@ def load(self) -> Any:
with store_lock(f"file:{os.path.realpath(self.file_path)}"):
try:
with open(self.file_path, "rb") as file:
return pickle.load(file) # noqa: S301
return _RestrictedUnpickler(file).load() # noqa: S301
except (FileNotFoundError, EOFError):
return {}
56 changes: 56 additions & 0 deletions lib/crewai/tests/utilities/test_file_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pickle
import unittest
import uuid

Expand Down Expand Up @@ -32,6 +33,37 @@ def test_save_and_load(self):
loaded_data = self.handler.load()
assert loaded_data == data

def test_load_round_trips_training_data_artifact_shape(self):
data = {
"agent_id": {
"0": {
"initial_output": "Initial output",
"human_feedback": "Human feedback",
"improved_output": "Improved output",
}
}
}

self.handler.save(data)

assert self.handler.load() == data

def test_load_round_trips_trained_agents_artifact_shape(self):
data = {
"researcher": {
"suggestions": [
"Use precise terminology.",
"Explain assumptions before giving the answer.",
],
"quality": 8.0,
"final_summary": "The agent improved after applying feedback.",
}
}

self.handler.save(data)

assert self.handler.load() == data

def test_load_empty_file(self):
loaded_data = self.handler.load()
assert loaded_data == {}
Expand All @@ -47,3 +79,27 @@ def test_load_corrupted_file(self):

assert str(exc.value) == "pickle data was truncated"
assert "<class '_pickle.UnpicklingError'>" == str(exc.type)

def test_load_rejects_unsafe_pickle_globals(self):
marker = f"CREWAI_PICKLE_HANDLER_EXPLOITED_{uuid.uuid4().hex}"
previous_value = os.environ.get(marker)

class _Exploit:
def __reduce__(self):
return (exec, (f"import os; os.environ[{marker!r}] = '1'",))

with open(self.file_path, "wb") as file:
pickle.dump(_Exploit(), file, protocol=pickle.HIGHEST_PROTOCOL)
file.flush()
os.fsync(file.fileno())

try:
with pytest.raises(pickle.UnpicklingError, match="Refusing to unpickle"):
self.handler.load()

assert marker not in os.environ
finally:
if previous_value is None:
os.environ.pop(marker, None)
else:
os.environ[marker] = previous_value