Skip to content

Commit 8a85b86

Browse files
authored
Merge pull request #159 from python-discord/bytes-output
File system and Binary file sending
2 parents e6e829a + 94e1378 commit 8a85b86

21 files changed

+1231
-97
lines changed

README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
Python sandbox runners for executing code in isolation aka snekbox.
99

10+
Supports a memory [virtual read/write file system](#virtual-file-system) within the sandbox,
11+
allowing text or binary files to be sent and returned.
12+
1013
A client sends Python code to a snekbox, the snekbox executes the code, and finally the results of the execution are returned to the client.
1114

1215
```mermaid
@@ -60,10 +63,26 @@ The main features of the default configuration are:
6063
* Memory limit
6164
* Process count limit
6265
* No networking
63-
* Restricted, read-only filesystem
66+
* Restricted, read-only system filesystem
67+
* Memory-based read-write filesystem mounted as working directory `/home`
6468

6569
NsJail is configured through [`snekbox.cfg`]. It contains the exact values for the items listed above. The configuration format is defined by a [protobuf file][7] which can be referred to for documentation. The command-line options of NsJail can also serve as documentation since they closely follow the config file format.
6670

71+
### Memory File System
72+
73+
On each execution, the host will mount an instance-specific `tmpfs` drive, this is used as a limited read-write folder for the sandboxed code. There is no access to other files or directories on the host container beyond the other read-only mounted system folders. Instance file systems are isolated; it is not possible for sandboxed code to access another instance's writeable directory.
74+
75+
The following options for the memory file system are configurable as options in [gunicorn.conf.py](config/gunicorn.conf.py)
76+
77+
* `memfs_instance_size` Size in bytes for the capacity of each instance file system.
78+
* `memfs_home` Path to the home directory within the instance file system.
79+
* `memfs_output` Path to the output directory within the instance file system.
80+
* `files_limit` Maximum number of valid output files to parse.
81+
* `files_timeout` Maximum time in seconds for output file parsing and encoding.
82+
* `files_pattern` Glob pattern to match files within `output`.
83+
84+
The sandboxed code execution will start with a writeable working directory of `home`. By default, the output folder is also `home`. New files, and uploaded files with a newer last modified time, will be uploaded on completion.
85+
6786
### Gunicorn
6887

6988
[Gunicorn settings] can be found in [`gunicorn.conf.py`]. In the default configuration, the worker count, the bind address, and the WSGI app URI are likely the only things of any interest. Since it uses the default synchronous workers, the [worker count] effectively determines how many concurrent code evaluations can be performed.

config/snekbox.cfg

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ description: "Execute Python"
33

44
mode: ONCE
55
hostname: "snekbox"
6-
cwd: "/snekbox"
6+
cwd: "/home"
77

88
time_limit: 6
99

@@ -16,10 +16,12 @@ envar: "VECLIB_MAXIMUM_THREADS=5"
1616
envar: "NUMEXPR_NUM_THREADS=5"
1717
envar: "PYTHONPATH=/snekbox/user_base/lib/python3.11/site-packages"
1818
envar: "PYTHONIOENCODING=utf-8:strict"
19+
envar: "HOME=home"
1920

2021
keep_caps: false
2122

2223
rlimit_as: 700
24+
rlimit_fsize_type: INF
2325

2426
clone_newnet: true
2527
clone_newuser: true
@@ -108,12 +110,12 @@ cgroup_mem_max: 52428800
108110
cgroup_mem_swap_max: 0
109111
cgroup_mem_mount: "/sys/fs/cgroup/memory"
110112

111-
cgroup_pids_max: 5
113+
cgroup_pids_max: 6
112114
cgroup_pids_mount: "/sys/fs/cgroup/pids"
113115

114116
iface_no_lo: true
115117

116118
exec_bin {
117119
path: "/usr/local/bin/python"
118-
arg: "-Squ"
120+
arg: "-BSqu"
119121
}

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ services:
88
image: ghcr.io/python-discord/snekbox${IMAGE_SUFFIX:--venv:dev}
99
pull_policy: never
1010
ports:
11-
- 8060:8060
11+
- "8060:8060"
1212
init: true
1313
ipc: none
1414
tty: true

snekbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from snekbox.nsjail import NsJail # noqa: E402
1313
from snekbox.utils.logging import init_logger, init_sentry # noqa: E402
1414

15-
__all__ = ("NsJail", "SnekAPI")
15+
__all__ = ("NsJail", "SnekAPI", "DEBUG")
1616

1717
init_sentry(__version__)
1818
init_logger(DEBUG)

snekbox/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def parse_args() -> argparse.Namespace:
3737
def main() -> None:
3838
"""Evaluate Python code through NsJail."""
3939
args = parse_args()
40-
result = NsJail().python3(args.code, nsjail_args=args.nsjail_args, py_args=args.py_args)
40+
result = NsJail().python3(py_args=[*args.py_args, args.code], nsjail_args=args.nsjail_args)
4141
print(result.stdout)
4242

4343
if result.returncode != 0:

snekbox/api/resources/eval.py

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import logging
24

35
import falcon
@@ -7,6 +9,8 @@
79

810
__all__ = ("EvalResource",)
911

12+
from snekbox.snekio import FileAttachment, ParsingError
13+
1014
log = logging.getLogger(__name__)
1115

1216

@@ -25,8 +29,26 @@ class EvalResource:
2529
"properties": {
2630
"input": {"type": "string"},
2731
"args": {"type": "array", "items": {"type": "string"}},
32+
"files": {
33+
"type": "array",
34+
"items": {
35+
"type": "object",
36+
"properties": {
37+
"path": {
38+
"type": "string",
39+
# Disallow starting with / or containing \0 anywhere
40+
"pattern": r"^(?!/)(?!.*\\0).*$",
41+
},
42+
"content": {"type": "string"},
43+
},
44+
"required": ["path"],
45+
},
46+
},
2847
},
29-
"required": ["input"],
48+
"anyOf": [
49+
{"required": ["input"]},
50+
{"required": ["args"]},
51+
],
3052
}
3153

3254
def __init__(self, nsjail: NsJail):
@@ -38,8 +60,11 @@ def on_post(self, req: falcon.Request, resp: falcon.Response) -> None:
3860
Evaluate Python code and return stdout, stderr, and the return code.
3961
4062
A list of arguments for the Python subprocess can be specified as `args`.
41-
Otherwise, the default argument "-c" is used to execute the input code.
42-
The input code is always passed as the last argument to Python.
63+
64+
If `input` is specified, it will be appended as the last argument to `args`,
65+
and `args` will have a default argument of `"-c"`.
66+
67+
Either `input` or `args` must be specified.
4368
4469
The return codes mostly resemble those of a Unix shell. Some noteworthy cases:
4570
@@ -53,33 +78,64 @@ def on_post(self, req: falcon.Request, resp: falcon.Response) -> None:
5378
Request body:
5479
5580
>>> {
56-
... "input": "[i for i in range(1000)]",
57-
... "args": ["-m", "timeit"] # This is optional
81+
... "input": "print('Hello')"
82+
... }
83+
84+
>>> {
85+
... "args": ["-c", "print('Hello')"]
86+
... }
87+
88+
>>> {
89+
... "args": ["main.py"],
90+
... "files": [
91+
... {
92+
... "path": "main.py",
93+
... "content": "SGVsbG8...=" # Base64
94+
... }
95+
... ]
5896
... }
5997
6098
Response format:
6199
62100
>>> {
63-
... "stdout": "10000 loops, best of 5: 23.8 usec per loop\n",
64-
... "returncode": 0
101+
... "stdout": "10000 loops, best of 5: 23.8 usec per loop",
102+
... "returncode": 0,
103+
... "files": [
104+
... {
105+
... "path": "output.png",
106+
... "size": 57344,
107+
... "content": "eJzzSM3...=" # Base64
108+
... }
109+
... ]
65110
... }
66111
67112
Status codes:
68113
69114
- 200
70115
Successful evaluation; not indicative that the input code itself works
71116
- 400
72-
Input's JSON schema is invalid
117+
Input JSON schema is invalid
73118
- 415
74119
Unsupported content type; only application/JSON is supported
75120
"""
76-
code = req.media["input"]
77-
args = req.media.get("args", ("-c",))
78-
121+
body: dict[str, str | list[str] | list[dict[str, str]]] = req.media
122+
# If `input` is supplied, default `args` to `-c`
123+
if "input" in body:
124+
body.setdefault("args", ["-c"])
125+
body["args"].append(body["input"])
79126
try:
80-
result = self.nsjail.python3(code, py_args=args)
127+
result = self.nsjail.python3(
128+
py_args=body["args"],
129+
files=[FileAttachment.from_dict(file) for file in body.get("files", [])],
130+
)
131+
except ParsingError as e:
132+
raise falcon.HTTPBadRequest(title="Request file is invalid", description=str(e))
81133
except Exception:
82134
log.exception("An exception occurred while trying to process the request")
83135
raise falcon.HTTPInternalServerError
84136

85-
resp.media = {"stdout": result.stdout, "returncode": result.returncode}
137+
resp.media = {
138+
"stdout": result.stdout,
139+
"returncode": result.returncode,
140+
"files": [f.as_dict for f in result.files],
141+
}

snekbox/filesystem.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Mounts and unmounts filesystems."""
2+
from __future__ import annotations
3+
4+
import ctypes
5+
import os
6+
from ctypes.util import find_library
7+
from enum import IntEnum
8+
from pathlib import Path
9+
10+
__all__ = ("mount", "unmount", "Size", "UnmountFlags")
11+
12+
libc = ctypes.CDLL(find_library("c"), use_errno=True)
13+
libc.mount.argtypes = (
14+
ctypes.c_char_p,
15+
ctypes.c_char_p,
16+
ctypes.c_char_p,
17+
ctypes.c_ulong,
18+
ctypes.c_char_p,
19+
)
20+
libc.umount2.argtypes = (ctypes.c_char_p, ctypes.c_int)
21+
22+
23+
class Size(IntEnum):
24+
"""Size multipliers for bytes."""
25+
26+
KiB = 1024
27+
MiB = 1024**2
28+
GiB = 1024**3
29+
TiB = 1024**4
30+
31+
32+
class UnmountFlags(IntEnum):
33+
"""Flags for umount2."""
34+
35+
MNT_FORCE = 1
36+
MNT_DETACH = 2
37+
MNT_EXPIRE = 4
38+
UMOUNT_NOFOLLOW = 8
39+
40+
41+
def mount(source: Path | str, target: Path | str, fs: str, **options: str | int) -> None:
42+
"""
43+
Mount a filesystem.
44+
45+
https://man7.org/linux/man-pages/man8/mount.8.html
46+
47+
Args:
48+
source: Source directory or device.
49+
target: Target directory.
50+
fs: Filesystem type.
51+
**options: Mount options.
52+
53+
Raises:
54+
OSError: On any mount error.
55+
"""
56+
if Path(target).is_mount():
57+
raise OSError(f"{target} is already a mount point")
58+
59+
kwargs = " ".join(f"{key}={value}" for key, value in options.items())
60+
61+
result: int = libc.mount(
62+
str(source).encode(), str(target).encode(), fs.encode(), 0, kwargs.encode()
63+
)
64+
if result < 0:
65+
errno = ctypes.get_errno()
66+
raise OSError(errno, f"Error mounting {target}: {os.strerror(errno)}")
67+
68+
69+
def unmount(target: Path | str, flags: UnmountFlags | int = UnmountFlags.MNT_DETACH) -> None:
70+
"""
71+
Unmount a filesystem.
72+
73+
https://man7.org/linux/man-pages/man2/umount.2.html
74+
75+
Args:
76+
target: Target directory.
77+
flags: Unmount flags.
78+
79+
Raises:
80+
OSError: On any unmount error.
81+
"""
82+
if not Path(target).is_mount():
83+
raise OSError(f"{target} is not a mount point")
84+
85+
result: int = libc.umount2(str(target).encode(), int(flags))
86+
if result < 0:
87+
errno = ctypes.get_errno()
88+
raise OSError(errno, f"Error unmounting {target}: {os.strerror(errno)}")

0 commit comments

Comments
 (0)