Skip to content

Commit 40a0c21

Browse files
committed
Add --direct argument and sandbox support to LLMBenchMCPServer
- Added --direct flag to run without sandboxing (current behavior) - When --direct is not present, server re-launches itself in a Sandbox.jl sandbox - Sandbox provides isolation for untrusted benchmark code - Improved module loading to support both packages and local .jl files - Added helpful error messages when Sandbox.jl is not available - Updated help text to document the new --direct option The default behavior is now to run in a sandbox for security, with --direct as an opt-in flag for running without isolation. This matches the security model of running untrusted benchmark code safely.
1 parent 889ddba commit 40a0c21

File tree

1 file changed

+141
-2
lines changed

1 file changed

+141
-2
lines changed

src/server.jl

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,97 @@ function run_server_with_revise(server::ClaudeMCPTools.MCPServer, socket_path::S
117117
end
118118
end
119119

120+
"""
121+
launch_in_sandbox(args::Vector{String})
122+
123+
Re-launch the LLMBenchMCPServer inside a Sandbox.jl sandbox.
124+
"""
125+
function launch_in_sandbox(args::Vector{String})::Cint
126+
# Check if Sandbox is available
127+
Sandbox = nothing
128+
try
129+
# Try to load Sandbox - it should be available if running within ClaudeBox
130+
Sandbox = Base.require(Base.PkgId(Base.UUID("a4e034a1-bbed-5493-bc6f-f0a4e1c5e439"), "Sandbox"))
131+
catch e
132+
# Sandbox not available, provide helpful error message
133+
println(stderr, """
134+
Error: Sandbox.jl is required for sandboxed execution but is not available.
135+
136+
Options:
137+
1. Run with --direct flag to execute without sandboxing:
138+
julia --project -m LLMBenchMCPServer ModuleName --direct
139+
140+
2. Run from within ClaudeBox environment where Sandbox.jl is available
141+
142+
3. Install Sandbox.jl (requires BinaryBuilder2 ecosystem):
143+
] add Sandbox
144+
""")
145+
return Cint(1)
146+
end
147+
148+
# Get the host platform
149+
host_platform = Base.BinaryPlatforms.HostPlatform()
150+
151+
# Create minimal mounts for the sandbox
152+
# We'll use a minimal Debian rootfs and mount the Julia installation
153+
mounts = Dict{String, Any}(
154+
"/" => Sandbox.MountInfo(Sandbox.debian_rootfs(; platform=host_platform), Sandbox.MountType.Overlayed),
155+
"/workspace" => Sandbox.MountInfo(pwd(), Sandbox.MountType.ReadWrite),
156+
)
157+
158+
# Mount the Julia installation directory
159+
julia_bin = Base.julia_cmd().exec[1]
160+
julia_dir = dirname(dirname(julia_bin)) # Get Julia installation directory
161+
if isdir(julia_dir)
162+
mounts["/opt/julia"] = Sandbox.MountInfo(julia_dir, Sandbox.MountType.ReadOnly)
163+
end
164+
165+
# Mount the current project directory (where LLMBenchMCPServer is)
166+
project_dir = dirname(dirname(@__FILE__))
167+
mounts["/opt/llmbench"] = Sandbox.MountInfo(project_dir, Sandbox.MountType.ReadOnly)
168+
169+
# Set up environment variables
170+
env = Dict{String, String}(
171+
"PATH" => "/opt/julia/bin:/usr/local/bin:/usr/bin:/bin",
172+
"HOME" => "/root",
173+
"USER" => "root",
174+
"JULIA_PROJECT" => "/opt/llmbench",
175+
)
176+
177+
# Build the command to run inside the sandbox
178+
# Add --direct flag to prevent infinite recursion
179+
new_args = copy(args)
180+
push!(new_args, "--direct")
181+
182+
# Build the Julia command
183+
cmd = Cmd(["/opt/julia/bin/julia", "--project=/opt/llmbench", "-m", "LLMBenchMCPServer"])
184+
cmd = `$cmd $new_args`
185+
186+
# Create the sandbox configuration
187+
config = Sandbox.SandboxConfig(
188+
mounts,
189+
env;
190+
stdin=Base.stdin,
191+
stdout=Base.stdout,
192+
stderr=Base.stderr,
193+
pwd="/workspace"
194+
)
195+
196+
# Run in the sandbox
197+
exit_code = Cint(0)
198+
try
199+
Sandbox.with_executor() do exe
200+
# Run the command in the sandbox
201+
run(exe, config, cmd)
202+
end
203+
catch e
204+
println(stderr, "Error running in sandbox: $e")
205+
exit_code = Cint(1)
206+
end
207+
208+
return exit_code
209+
end
210+
120211
"""
121212
LLMBenchServer
122213
@@ -179,6 +270,7 @@ function @main(args)
179270
--revise Load Revise.jl and auto-reload code changes
180271
--no-basic-tools Disable basic tools (bash, str_replace_editor)
181272
--verbose Enable verbose output
273+
--direct Run directly without sandboxing (default: run in sandbox)
182274
--help, -h Show this help message
183275
184276
The specified module should export:
@@ -191,6 +283,7 @@ function @main(args)
191283
Examples:
192284
julia --project -m LLMBenchMCPServer MyBenchmark
193285
julia --project -m LLMBenchMCPServer MyBenchmark --socket
286+
julia --project -m LLMBenchMCPServer MyBenchmark --direct # Run without sandbox
194287
""")
195288
return 0
196289
end
@@ -203,6 +296,7 @@ function @main(args)
203296
use_revise = false
204297
include_basic_tools = true
205298
verbose = false
299+
direct_mode = false # New flag for direct execution
206300

207301
i = 2
208302
while i <= length(args)
@@ -225,11 +319,28 @@ function @main(args)
225319
elseif args[i] == "--verbose"
226320
verbose = true
227321
i += 1
322+
elseif args[i] == "--direct"
323+
direct_mode = true
324+
i += 1
228325
else
229326
println("Warning: Unknown option: $(args[i])")
230327
i += 1
231328
end
232329
end
330+
331+
# If not in direct mode, re-launch ourselves in a sandbox
332+
if !direct_mode
333+
if verbose
334+
println("Launching LLMBenchMCPServer in sandbox...")
335+
println("Note: Sandbox mode requires Sandbox.jl from BinaryBuilder2 ecosystem")
336+
end
337+
return launch_in_sandbox(args)
338+
end
339+
340+
# In direct mode, show a warning if verbose
341+
if verbose && direct_mode
342+
println("Running in DIRECT mode (no sandboxing)")
343+
end
233344

234345
# Load Revise if requested
235346
if use_revise
@@ -253,9 +364,37 @@ function @main(args)
253364

254365
# Load the module
255366
try
256-
# Load the module using Base.require
367+
# Try to load as a module first, then as a file
368+
mod = nothing
257369
mod_symbol = Symbol(module_name)
258-
mod = Base.require(Main, mod_symbol)
370+
371+
# First try to load as a registered package/module
372+
try
373+
mod = Base.require(Main, mod_symbol)
374+
catch
375+
# If that fails, try to load as a local file
376+
if endswith(module_name, ".jl")
377+
# Load file directly
378+
Base.include(Main, module_name)
379+
# Extract module name from file
380+
file_mod_name = basename(module_name)[1:end-3] # Remove .jl
381+
mod_symbol = Symbol(file_mod_name)
382+
if isdefined(Main, mod_symbol)
383+
mod = getfield(Main, mod_symbol)
384+
end
385+
elseif isfile(module_name * ".jl")
386+
# Try adding .jl extension
387+
Base.include(Main, module_name * ".jl")
388+
mod_symbol = Symbol(module_name)
389+
if isdefined(Main, mod_symbol)
390+
mod = getfield(Main, mod_symbol)
391+
end
392+
end
393+
end
394+
395+
if mod === nothing
396+
throw(ArgumentError("Could not load module $module_name"))
397+
end
259398

260399
# Extract functions
261400
setup_fn = nothing

0 commit comments

Comments
 (0)