From a98df60d7914f58363079b41746d3ae6418d7f2c Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Thu, 7 Nov 2024 11:43:41 -0500 Subject: [PATCH] Add bandit security linter and refactor the implementation of --sandbox --- .pre-commit-config.yaml | 6 ++++ bandit.yaml | 9 +++++ src/pdl/pdl.py | 44 +++-------------------- src/pdl/pdl_interpreter.py | 16 ++++++--- src/pdl/pdl_runner.py | 74 +++++++++++++++++++++++--------------- 5 files changed, 77 insertions(+), 72 deletions(-) create mode 100644 bandit.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02e8055b0..d0b40da46 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,6 +45,12 @@ repos: "-sn", # Don't display the score "--rcfile=pylintrc" ] + # run the bandit security linter + - repo: https://github.com/PyCQA/bandit + rev: 1.7.10 + hooks: + - id: bandit + args: [-c, bandit.yaml] # Type check the Python code with MyPy - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v1.11.2' diff --git a/bandit.yaml b/bandit.yaml new file mode 100644 index 000000000..861ec88dd --- /dev/null +++ b/bandit.yaml @@ -0,0 +1,9 @@ +--- +# This is the configuration file for the bandit python static analysis tool +exclude_dirs: + - build +# We are less worried about tests, as they are not a part of the library meant to be used by users +# with untrusted inputs. + - test +skips: + - B101 # allow the use of assert diff --git a/src/pdl/pdl.py b/src/pdl/pdl.py index 23e7e5d78..3f9072a3d 100644 --- a/src/pdl/pdl.py +++ b/src/pdl/pdl.py @@ -1,8 +1,6 @@ import argparse import json import logging -import os -import subprocess import sys from pathlib import Path from typing import Any, Literal, Optional, TypedDict @@ -23,6 +21,7 @@ ) from .pdl_interpreter import InterpreterState, process_prog from .pdl_parser import parse_file, parse_str +from .pdl_runner import exec_docker logger = logging.getLogger(__name__) @@ -231,43 +230,10 @@ def main(): return if args.sandbox: - watsonx_apikey = "WATSONX_APIKEY=" + os.environ["WATSONX_APIKEY"] - watsonx_url = "WATSONX_URL=" + os.environ["WATSONX_URL"] - watsonx_project_id = "WATSONX_PROJECT_ID=" + os.environ["WATSONX_PROJECT_ID"] - replicate_api_token = "REPLICATE_API_TOKEN=" + os.environ["REPLICATE_API_TOKEN"] - - local_dir = os.getcwd() + ":/local" - try: - args = sys.argv[1:] - args.remove("--sandbox") - subprocess.run( - [ - "docker", - "run", - "-v", - local_dir, - "-w", - "/local", - "-e", - watsonx_apikey, - "-e", - watsonx_url, - "-e", - watsonx_project_id, - "-e", - replicate_api_token, - "--rm", - "-it", - "quay.io/project_pdl/pdl", - *args, - ], - check=True, - ) - except Exception: - print( - "An error occured while running docker. Is the docker daemon running?" - ) - return + args = sys.argv[1:] + args.remove("--sandbox") + exec_docker(*args) + assert False # unreachable: exec_docker terminate the execution initial_scope = {} if args.data_file is not None: diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 4d22e9cfd..22344bb70 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -2,7 +2,7 @@ import logging import re import shlex -import subprocess +import subprocess # nosec import sys import types @@ -926,7 +926,9 @@ def process_expr(scope: ScopeType, expr: Any, loc: LocationType) -> Any: try: if expr.startswith(EXPR_START_STRING) and expr.endswith(EXPR_END_STRING): # `expr` might be a single expression and should not be stringify - env = Environment( + env = Environment( # nosec B701 + # [B701:jinja2_autoescape_false] By default, jinja2 sets autoescape to False. Consider using autoescape=True or use the select_autoescape function to mitigate XSS vulnerabilities. + # This is safe because autoescape is not needed since we do not generate HTML block_start_string="{%%%%%PDL%%%%%%%%%%", block_end_string="%%%%%PDL%%%%%%%%%%}", variable_start_string=EXPR_START_STRING, @@ -1276,14 +1278,20 @@ def step_call_code( def call_python(code: str, scope: dict) -> Any: my_namespace = types.SimpleNamespace(PDL_SESSION=__PDL_SESSION, **scope) - exec(code, my_namespace.__dict__) + exec(code, my_namespace.__dict__) # nosec B102 + # [B102:exec_used] Use of exec detected. + # This is the code that the user asked to execute. It can be executed in a docker container with the option `--sandbox` result = my_namespace.result return result def call_command(code: str) -> str: args = shlex.split(code) - p = subprocess.run(args, capture_output=True, text=True, check=False) + p = subprocess.run( + args, capture_output=True, text=True, check=False, shell=False + ) # nosec B603 + # [B603:subprocess_without_shell_equals_true] subprocess call - check for execution of untrusted input. + # This is the code that the user asked to execute. It can be executed in a docker container with the option `--sandbox` if p.stderr != "": print(p.stderr, file=sys.stderr) if p.returncode != 0: diff --git a/src/pdl/pdl_runner.py b/src/pdl/pdl_runner.py index b0baf7a42..a73e987fc 100644 --- a/src/pdl/pdl_runner.py +++ b/src/pdl/pdl_runner.py @@ -1,36 +1,52 @@ import os -import subprocess +import subprocess # nosec import sys +from shutil import which -WATSONX_APIKEY = "WATSONX_APIKEY=" + os.environ["WATSONX_APIKEY"] -WATSONX_URL = "WATSONX_URL=" + os.environ["WATSONX_URL"] -WATSONX_PROJECT_ID = "WATSONX_PROJECT_ID=" + os.environ["WATSONX_PROJECT_ID"] -LOCAL_DIR = os.getcwd() + ":/local" +def exec_docker(*args): + try: + docker = which("docker") + if docker is None: + print("Error: unable to find the docker command", file=sys.stderr) + sys.exit(1) -def main(): - subprocess.run( - [ - "docker", - "run", - "-v", - LOCAL_DIR, - "-w", - "/local", - "-e", - WATSONX_APIKEY, - "-e", - WATSONX_URL, - "-e", - WATSONX_PROJECT_ID, - "--rm", - "-it", - "pdl-runner", - *sys.argv[1:], - ], - check=True, - ) + watsonx_apikey = "WATSONX_APIKEY=" + os.environ["WATSONX_APIKEY"] + watsonx_url = "WATSONX_URL=" + os.environ["WATSONX_URL"] + watsonx_project_id = "WATSONX_PROJECT_ID=" + os.environ["WATSONX_PROJECT_ID"] + replicate_api_token = "REPLICATE_API_TOKEN=" + os.environ["REPLICATE_API_TOKEN"] + local_dir = os.getcwd() + ":/local" -if __name__ == "__main__": - main() + subprocess.run( # nosec B603 + # [B603:subprocess_without_shell_equals_true] subprocess call - check for execution of untrusted input. + # This is safe since the environment variables are the arguments of the options `-e` and `*args` is explicitly given by the user + [ + docker, + "run", + "-v", + local_dir, + "-w", + "/local", + "-e", + watsonx_apikey, + "-e", + watsonx_url, + "-e", + watsonx_project_id, + "-e", + replicate_api_token, + "--rm", + "-it", + "quay.io/project_pdl/pdl", + *args, + ], + check=True, + ) + sys.exit(0) + except Exception: + print( + "An error occurred while running docker.", + file=sys.stderr, + ) + sys.exit(1)