Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"name": "Python",
"build": {
"dockerfile": "Dockerfile",
"context": ".."
"context": ".."
},
"postStartCommand": "source /home/nonroot/.venv/bin/activate && python3 -m pip install --disable-pip-version-check -e .",
"postAttachCommand": "source /home/nonroot/.venv/bin/activate",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python"
],
"extensions": [ "ms-python.python", "github.copilot-chat" ],
"settings": {
"chat.tools.autoApprove": true,
"chat.tools.terminal.autoApprove": { "/.*/": true },
Expand Down
50 changes: 50 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
This repo contains a security scanner which analyzes Python Pickle files and reports dangerous function calls.

# Code style

After making code changes, lint the code using:
```
black src tests --line-length 140
flake8 src tests --count --show-source
```

# Bug fixes

## Update block-list

The scanner relies on an allow-list called `_safe_globals` and a block-list called `_unsafe_globals` in `src/picklescan/scanner.py`. Those lists need to be updated when bugs or security advisories report detection issues.

To update the block-list, create a sample Pickle file reproing the issue, add a test for it, verify that the test fails, update the block-list, and verify the test passes.

In more details:

Step 1: Update and run `tests/init_data_files.py` to create the sample Pickle file.

First create a `reduce_xxx()` function calling the function to add to the block list. For instance:
```python
def reduce_GHSA_4whj_rm5r_c2v8():
import torch.utils.bottleneck.__main__ as bottleneck_main

return bottleneck_main.run_autograd_prof, (_payload, {})
```

The `reduce_xxx()` function must be self-contained: include `import` statements directly in the function and not at the top of the file (i.e. do not follow the typical Python convention). If a package needs to be intalled, run `python3 -m pip install <package>==<version>` to install it in the current virtual environment, and add `<package>==<version>` in `requirements_extras.txt` for future reference.

In `initialize_pickle_files()`, serialize the `reduce_xxx()` function to a file:
```python
initialize_pickle_file_from_reduce("GHSA-4whj-rm5r-c2v8.pkl", reduce_GHSA_4whj_rm5r_c2v8)
```

Finally run `python3 tests/init_data_files.py` to create the sample file.

Step 2: add code validating the output of the scanner for the sample file. In `tests/test_scanner.py`, add an assert in `test_scan_file_path()`. For instance:
```python
assert_scan("GHSA-4whj-rm5r-c2v8.pkl", [Global("torch.utils.bottleneck.__main__", "run_autograd_prof", SafetyLevel.Dangerous)])
```

Run the test and verify it fails:
```bash
pytest tests -k test_scan_file_path -vv
```

Step 3: add a new entry in dictionary `_unsafe_globals` of `src/picklescan/scanner.py` and rerun the test to verify it passes.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ twine==4.0.1
flake8==5.0.4
pytest==7.1.3
pytest-cov==3.0.0
requests==2.31.0
aiohttp==3.9.1
requests==2.32.5
aiohttp==3.13.2
black==22.8.0
numpy>1.24.0,<2.0.0
py7zr==0.22.0
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = picklescan
version = 0.0.32
version = 0.0.33
author = Matthieu Maitre
author_email = mmaitre314@users.noreply.github.com
description = Security scanner detecting Python Pickle files performing suspicious actions
Expand Down
23 changes: 18 additions & 5 deletions src/picklescan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,18 @@ def __str__(self) -> str:
"asyncio": "*",
"bdb": "*",
"commands": "*", # Python 2 precursor to subprocess
"ctypes": "*", # Foreign function interface, can load DLLs, call C functions, manipulate raw memory
"functools": "partial", # functools.partial(os.system, "echo pwned")
"httplib": "*", # Includes http.client.HTTPSConnection()
"numpy.f2py": "*", # Multiple unsafe functions (e.g., getlincoef, _eval_length) that call eval on arbitrary strings
"numpy.testing._private.utils": "*", # runstring() in this module is a synonym for exec()
"nt": "*", # Alias for 'os' on Windows. Includes os.system()
"posix": "*", # Alias for 'os' on Linux. Includes os.system()
"operator": "attrgetter", # Ex of code execution: operator.attrgetter("system")(__import__("os"))("echo pwned")
"operator": {
"attrgetter", # Ex of code execution: operator.attrgetter("system")(__import__("os"))("echo pwned")
"itemgetter",
"methodcaller",
},
"os": "*",
"requests.api": "*",
"runpy": "*", # Includes runpy._run_code
Expand All @@ -136,6 +142,7 @@ def __str__(self) -> str:
"sys": "*",
"code": {"InteractiveInterpreter.runcode"},
"cProfile": {"runctx", "run"},
"distutils.file_util": "*", # arbitrary file write via distutils.file_util.write_file()
"doctest": {"debug_script"},
"ensurepip": {"_run_pip"},
"idlelib.autocomplete": {"AutoComplete.get_entity", "AutoComplete.fetch_completions"},
Expand All @@ -149,8 +156,9 @@ def __str__(self) -> str:
"pickle": "*",
"_pickle": "*",
"pip": "*",
"pty": "*", # pty.spawn() allows executing arbitrary commands
"profile": {"Profile.run", "Profile.runctx"},
"pydoc": "pipepager", # pydoc.pipepager('help','echo pwned')
"pydoc": "*", # pydoc.locate can import arbitrary modules, pydoc.pipepager allows command execution
"timeit": "*",
"torch._dynamo.guards": {"GuardBuilder.get"},
"torch._inductor.codecache": "compile_file", # compile_file('', '', ['sh', '-c','$(echo pwned)'])
Expand Down Expand Up @@ -350,9 +358,14 @@ def _build_scan_result_from_raw_globals(
safe_filter = _safe_globals.get(g.module)
unsafe_filter = _unsafe_globals.get(g.module)

# If the module as a whole is marked as dangerous, submodules are also dangerous
if unsafe_filter is None and "." in g.module and _unsafe_globals.get(g.module.split(".")[0]) == "*":
unsafe_filter = "*"
# If any parent module is marked as dangerous with "*", submodules are also dangerous
if unsafe_filter is None and "." in g.module:
module_parts = g.module.split(".")
for i in range(1, len(module_parts)):
parent_module = ".".join(module_parts[:i])
if _unsafe_globals.get(parent_module) == "*":
unsafe_filter = "*"
break

if "unknown" in g.module or "unknown" in g.name:
g.safety = SafetyLevel.Dangerous
Expand Down
Binary file added tests/data2/GHSA-4675-36f9-wf6r.pkl
Binary file not shown.
Binary file added tests/data2/GHSA-84r2-jw7c-4r5q.pkl
Binary file not shown.
Binary file added tests/data2/GHSA-m273-6v24-x4m4.pkl
Binary file not shown.
Binary file added tests/data2/GHSA-r8g5-cgf2-4m4m.pkl
Binary file not shown.
Binary file added tests/data2/GHSA-vqmv-47xg-9wpr.pkl
Binary file not shown.
Loading
Loading