Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 230 additions & 25 deletions codex-rs/core/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,31 @@ use crate::spawn::spawn_child_async;

const DEFAULT_TIMEOUT_MS: u64 = 10_000;

// Hardcode these since it does not seem worth including the libc crate just
// for these.
const SIGKILL_CODE: i32 = 9;
// Conventional bases and codes that are platform-agnostic.
const TIMEOUT_CODE: i32 = 64;
const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal
const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code
const EXIT_NOT_EXECUTABLE_CODE: i32 = 126; // "found but not executable"/shebang/perms

// Unix signal constants via libc (grouped), including SIGSYS. These are only available on Unix.
#[cfg(target_family = "unix")]
mod unix_sig {
pub const SIGINT_CODE: i32 = libc::SIGINT; // Ctrl-C / interrupt
pub const SIGABRT_CODE: i32 = libc::SIGABRT; // abort
pub const SIGBUS_CODE: i32 = libc::SIGBUS; // bus error
pub const SIGFPE_CODE: i32 = libc::SIGFPE; // floating point exception
pub const SIGSEGV_CODE: i32 = libc::SIGSEGV; // segmentation fault
pub const SIGPIPE_CODE: i32 = libc::SIGPIPE; // broken pipe
pub const SIGTERM_CODE: i32 = libc::SIGTERM; // termination
pub const SIGKILL_CODE: i32 = libc::SIGKILL;
pub const SIGSYS_CODE: i32 = libc::SIGSYS;
}
#[cfg(target_family = "unix")]
use unix_sig::*;

// On non-Unix (Windows), synthesize a "killed" signal using the conventional 9.
#[cfg(not(target_family = "unix"))]
const SIGKILL_CODE: i32 = 9;

// I/O buffer sizing
const READ_CHUNK_SIZE: usize = 8192; // bytes per read
Expand Down Expand Up @@ -72,6 +91,126 @@ pub enum SandboxType {
LinuxSeccomp,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SandboxVerdict {
LikelySandbox,
LikelyNotSandbox,
Unknown,
}

fn stderr_hints_sandbox(stderr: &str) -> bool {
// Conservative: require explicit sandbox/seccomp phrasing likely to come from
// a kernel/seatbelt/seccomp denial; generic perms errors are too noisy.
let s = stderr.to_ascii_lowercase();
s.contains("sandbox: deny")
|| s.contains("not permitted by sandbox")
|| s.contains("bad system call")
|| s.contains("seccomp")
// Less explicit, but commonly emitted by shells and tools when a seatbelt
// or seccomp policy blocks a write/open. Tests rely on these.
|| s.contains("operation not permitted")
|| s.contains("permission denied")
|| s.contains("read-only file system")
Comment on lines +101 to +113
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid treating generic “permission denied” output as sandbox denial

The new stderr_hints_sandbox function classifies any stderr containing phrases like "permission denied" or "operation not permitted" as LikelySandbox, and process_exec_tool_call turns that verdict into a SandboxErr::Denied. Many common commands emit those exact strings for reasons unrelated to the sandbox (e.g., git clone failing with Permission denied (publickey) or an SSH server refusing credentials). When such a failure happens under seatbelt/seccomp, the code will now misreport it as a sandbox denial even though the sandbox played no role. This defeats the goal of distinguishing real sandbox denials from normal command failures and will send users down the wrong debugging path. The heuristic should be narrowed or combined with other evidence before upgrading to Denied.

Useful? React with 👍 / 👎.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it's a first step. I agree that there's no such thing as a perfect heuristic; i wonder if we should adopt a weighted approach which assigns each of these "signals" with a weight, and then calculate the probability that it's a sandbox error.

That sounds good, but it becomes super complex. Either way, a permission denied (publickey) is likely a sandbox fail as it'll require an escalation to the user to fix.

}

fn sandbox_verdict(sandbox_type: SandboxType, status: ExitStatus, stderr: &str) -> SandboxVerdict {
if matches!(sandbox_type, SandboxType::None) {
return SandboxVerdict::LikelyNotSandbox;
}

// Prefer the signal path when available.
#[cfg(target_family = "unix")]
{
if let Some(sig) = status.signal() {
if sig == SIGSYS_CODE {
// SIGSYS under seccomp/seatbelt or invalid syscall is a strong tell.
return SandboxVerdict::LikelySandbox;
}
// Common user/app signals -> not sandbox.
match sig {
SIGINT_CODE | SIGABRT_CODE | SIGBUS_CODE | SIGFPE_CODE | SIGKILL_CODE
| SIGSEGV_CODE | SIGPIPE_CODE | SIGTERM_CODE => {
return SandboxVerdict::LikelyNotSandbox;
}
_ => { /* fall through */ }
}
}
}

let code = status.code().unwrap_or(-1);

// If stderr strongly hints at sandbox denial, prefer that classification even
// when the exit code is within generic BSD sysexits. This handles cases like
// macOS seatbelt failing early with "Operation not permitted".
if stderr_hints_sandbox(stderr) {
return SandboxVerdict::LikelySandbox;
}

// Immediate NotSandbox codes
// - 0: success
// - 2: common "misuse of shell builtins"
// - 124: conventional timeout wrapper exit code
// - 127: command not found
// - 64..=78: BSD sysexits range
// - 128 + {SIGINT, SIGABRT, SIGBUS, SIGFPE, SIGKILL, SIGSEGV, SIGPIPE, SIGTERM}
#[cfg(target_family = "unix")]
{
let sig_derived_not_sandbox = [
EXIT_CODE_SIGNAL_BASE + SIGINT_CODE,
EXIT_CODE_SIGNAL_BASE + SIGABRT_CODE,
EXIT_CODE_SIGNAL_BASE + SIGBUS_CODE,
EXIT_CODE_SIGNAL_BASE + SIGFPE_CODE,
EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE,
EXIT_CODE_SIGNAL_BASE + SIGSEGV_CODE,
EXIT_CODE_SIGNAL_BASE + SIGPIPE_CODE,
EXIT_CODE_SIGNAL_BASE + SIGTERM_CODE,
];
if code == 0
|| code == 2
|| code == EXEC_TIMEOUT_EXIT_CODE
|| code == 127
|| (64..=78).contains(&code)
|| sig_derived_not_sandbox.contains(&code)
{
return SandboxVerdict::LikelyNotSandbox;
}
}
#[cfg(not(target_family = "unix"))]
{
if code == 0
|| code == 2
|| code == EXEC_TIMEOUT_EXIT_CODE
|| code == 127
|| (64..=78).contains(&code)
{
return SandboxVerdict::LikelyNotSandbox;
}
}

// Shell-style signal encoding for SIGSYS (Unix).
#[cfg(target_family = "unix")]
{
if code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE {
return SandboxVerdict::LikelySandbox;
}
}

// 126 is often perms/shebang; upgrade only with explicit hints.
if code == EXIT_NOT_EXECUTABLE_CODE {
return if stderr_hints_sandbox(stderr) {
SandboxVerdict::LikelySandbox
} else {
SandboxVerdict::Unknown
};
}

if stderr_hints_sandbox(stderr) {
return SandboxVerdict::LikelySandbox;
}

SandboxVerdict::Unknown
}

#[derive(Clone)]
pub struct StdoutStream {
pub sub_id: String,
Expand Down Expand Up @@ -142,14 +281,23 @@ pub async fn process_exec_tool_call(
Ok(raw_output) => {
#[allow(unused_mut)]
let mut timed_out = raw_output.timed_out;
// If the process was killed by a signal, handle timeouts distinctly and
// defer SIGSYS (possible sandbox) until after we can examine stderr.
let mut pending_signal: Option<i32> = None;

#[cfg(target_family = "unix")]
{
if let Some(signal) = raw_output.exit_status.signal() {
if signal == TIMEOUT_CODE {
timed_out = true;
} else {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
// Defer SIGSYS (possible sandbox) only when a sandbox was requested;
// otherwise, treat any non-timeout signal as an immediate error.
if signal == SIGSYS_CODE && !matches!(sandbox_type, SandboxType::None) {
pending_signal = Some(signal);
} else {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
}
}
}
}
Expand Down Expand Up @@ -177,11 +325,26 @@ pub async fn process_exec_tool_call(
}));
}

if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
let verdict = sandbox_verdict(
sandbox_type,
raw_output.exit_status,
&exec_output.stderr.text,
);
tracing::debug!(
target: "codex_core::exec",
exit_code = exec_output.exit_code,
?pending_signal,
verdict = ?verdict,
"exec SandboxClassification"
);
if matches!(verdict, SandboxVerdict::LikelySandbox) {
return Err(CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(exec_output),
}));
}
if let Some(sig) = pending_signal {
return Err(CodexErr::Sandbox(SandboxErr::Signal(sig)));
}

Ok(exec_output)
}
Expand All @@ -192,26 +355,6 @@ pub async fn process_exec_tool_call(
}
}

/// We don't have a fully deterministic way to tell if our command failed
/// because of the sandbox - a command in the user's zshrc file might hit an
/// error, but the command itself might fail or succeed for other reasons.
/// For now, we conservatively check for 'command not found' (exit code 127),
/// and can add additional cases as necessary.
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
if sandbox_type == SandboxType::None {
return false;
}

// Quick rejects: well-known non-sandbox shell exit codes
// 127: command not found, 2: misuse of shell builtins
if exit_code == 127 {
return false;
}

// For all other cases, we assume the sandbox is the cause
true
}

#[derive(Debug)]
pub struct StreamOutput<T> {
pub text: T,
Expand Down Expand Up @@ -436,3 +579,65 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
#[expect(clippy::unwrap_used)]
std::process::ExitStatus::from_raw(code.try_into().unwrap())
}

#[cfg(test)]
mod tests {
use super::*;
#[cfg(unix)]
use std::os::unix::process::ExitStatusExt;

#[cfg(unix)]
fn status_from_code(c: i32) -> ExitStatus {
// Synthesize a normal exit status with the given exit code.
ExitStatus::from_raw(c << 8)
}

#[test]
#[cfg(unix)]
fn not_sandbox_obvious_codes() {
assert!(matches!(
sandbox_verdict(SandboxType::MacosSeatbelt, status_from_code(127), ""),
SandboxVerdict::LikelyNotSandbox
));
assert!(matches!(
sandbox_verdict(SandboxType::LinuxSeccomp, status_from_code(124), ""),
SandboxVerdict::LikelyNotSandbox
));
assert!(matches!(
sandbox_verdict(SandboxType::LinuxSeccomp, status_from_code(2), ""),
SandboxVerdict::LikelyNotSandbox
));
}

#[test]
#[cfg(target_family = "unix")]
fn sandbox_sigsys_codepath() {
let code = EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE;
assert!(matches!(
sandbox_verdict(SandboxType::LinuxSeccomp, status_from_code(code), ""),
SandboxVerdict::LikelySandbox
));
}

#[test]
#[cfg(unix)]
fn sandbox_stderr_hint() {
assert!(matches!(
sandbox_verdict(
SandboxType::MacosSeatbelt,
status_from_code(126),
"Sandbox: deny file-read-data"
),
SandboxVerdict::LikelySandbox
));
}

#[test]
#[cfg(unix)]
fn unknown_generic_failure() {
assert!(matches!(
sandbox_verdict(SandboxType::MacosSeatbelt, status_from_code(1), ""),
SandboxVerdict::Unknown
));
}
}
2 changes: 1 addition & 1 deletion codex-rs/core/tests/suite/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async fn write_file_fails_as_sandbox_error() {
let tmp = TempDir::new().expect("should be able to create temp dir");
let path = tmp.path().join("test.txt");
let cmd = vec![
"/user/bin/touch",
"/usr/bin/touch",
path.to_str().expect("should be able to get path"),
];

Expand Down