diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 6fad448b44..786e20523a 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -1,6 +1,7 @@ use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; +use crate::truncate::truncate_middle; use codex_protocol::ConversationId; use codex_protocol::protocol::RateLimitSnapshot; use reqwest::StatusCode; @@ -12,6 +13,9 @@ use tokio::task::JoinError; pub type Result = std::result::Result; +/// Limit UI error messages to a reasonable size while keeping useful context. +const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB + #[derive(Error, Debug)] pub enum SandboxErr { /// Error from sandbox execution @@ -304,21 +308,44 @@ impl CodexErr { } pub fn get_error_message_ui(e: &CodexErr) -> String { - match e { - CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(), + let message = match e { + CodexErr::Sandbox(SandboxErr::Denied { output }) => { + let aggregated = output.aggregated_output.text.trim(); + if !aggregated.is_empty() { + output.aggregated_output.text.clone() + } else { + let stderr = output.stderr.text.trim(); + let stdout = output.stdout.text.trim(); + match (stderr.is_empty(), stdout.is_empty()) { + (false, false) => format!("{stderr}\n{stdout}"), + (false, true) => output.stderr.text.clone(), + (true, false) => output.stdout.text.clone(), + (true, true) => format!( + "command failed inside sandbox with exit code {}", + output.exit_code + ), + } + } + } // Timeouts are not sandbox errors from a UX perspective; present them plainly - CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!( - "error: command timed out after {} ms", - output.duration.as_millis() - ), + CodexErr::Sandbox(SandboxErr::Timeout { output }) => { + format!( + "error: command timed out after {} ms", + output.duration.as_millis() + ) + } _ => e.to_string(), - } + }; + + truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0 } #[cfg(test)] mod tests { use super::*; + use crate::exec::StreamOutput; use codex_protocol::protocol::RateLimitWindow; + use pretty_assertions::assert_eq; fn rate_limit_snapshot() -> RateLimitSnapshot { RateLimitSnapshot { @@ -348,6 +375,73 @@ mod tests { ); } + #[test] + fn sandbox_denied_uses_aggregated_output_when_stderr_empty() { + let output = ExecToolCallOutput { + exit_code: 77, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new("aggregate detail".to_string()), + duration: Duration::from_millis(10), + timed_out: false, + }; + let err = CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(output), + }); + assert_eq!(get_error_message_ui(&err), "aggregate detail"); + } + + #[test] + fn sandbox_denied_reports_both_streams_when_available() { + let output = ExecToolCallOutput { + exit_code: 9, + stdout: StreamOutput::new("stdout detail".to_string()), + stderr: StreamOutput::new("stderr detail".to_string()), + aggregated_output: StreamOutput::new(String::new()), + duration: Duration::from_millis(10), + timed_out: false, + }; + let err = CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(output), + }); + assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail"); + } + + #[test] + fn sandbox_denied_reports_stdout_when_no_stderr() { + let output = ExecToolCallOutput { + exit_code: 11, + stdout: StreamOutput::new("stdout only".to_string()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new(String::new()), + duration: Duration::from_millis(8), + timed_out: false, + }; + let err = CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(output), + }); + assert_eq!(get_error_message_ui(&err), "stdout only"); + } + + #[test] + fn sandbox_denied_reports_exit_code_when_no_output_available() { + let output = ExecToolCallOutput { + exit_code: 13, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new(String::new()), + duration: Duration::from_millis(5), + timed_out: false, + }; + let err = CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(output), + }); + assert_eq!( + get_error_message_ui(&err), + "command failed inside sandbox with exit code 13" + ); + } + #[test] fn usage_limit_reached_error_formats_free_plan() { let err = UsageLimitReachedError { diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index d84bbc9fcb..306f622e2a 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -177,7 +177,7 @@ pub async fn process_exec_tool_call( })); } - if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { + if is_likely_sandbox_denied(sandbox_type, &exec_output) { return Err(CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(exec_output), })); @@ -195,21 +195,55 @@ pub async fn process_exec_tool_call( /// We don't have a fully deterministic way to tell if our command failed /// because of the sandbox - a command in the user's zshrc file might hit an /// error, but the command itself might fail or succeed for other reasons. -/// For now, we conservatively check for 'command not found' (exit code 127), -/// and can add additional cases as necessary. -fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool { - if sandbox_type == SandboxType::None { +/// For now, we conservatively check for well known command failure exit codes and +/// also look for common sandbox denial keywords in the command output. +fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool { + if sandbox_type == SandboxType::None || exec_output.exit_code == 0 { return false; } - // Quick rejects: well-known non-sandbox shell exit codes - // 127: command not found, 2: misuse of shell builtins - if exit_code == 127 { + const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127]; + if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) { return false; } - // For all other cases, we assume the sandbox is the cause - true + const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [ + "operation not permitted", + "permission denied", + "read-only file system", + "seccomp", + "sandbox", + "landlock", + "bad system call", + ]; + + if [ + &exec_output.stderr.text, + &exec_output.stdout.text, + &exec_output.aggregated_output.text, + ] + .into_iter() + .filter(|section| !section.is_empty()) + .any(|section| { + let lower = section.to_lowercase(); + SANDBOX_DENIED_KEYWORDS + .iter() + .any(|needle| lower.contains(needle)) + }) { + return true; + } + + #[cfg(unix)] + { + const SIGSYS_CODE: i32 = libc::SIGSYS; + if sandbox_type == SandboxType::LinuxSeccomp + && exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE + { + return true; + } + } + + false } #[derive(Debug)] @@ -436,3 +470,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus { #[expect(clippy::unwrap_used)] std::process::ExitStatus::from_raw(code.try_into().unwrap()) } + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + fn make_exec_output( + exit_code: i32, + stdout: &str, + stderr: &str, + aggregated: &str, + ) -> ExecToolCallOutput { + ExecToolCallOutput { + exit_code, + stdout: StreamOutput::new(stdout.to_string()), + stderr: StreamOutput::new(stderr.to_string()), + aggregated_output: StreamOutput::new(aggregated.to_string()), + duration: Duration::from_millis(1), + timed_out: false, + } + } + + #[test] + fn sandbox_detection_requires_keywords() { + let output = make_exec_output(1, "", "", ""); + assert!(!is_likely_sandbox_denied( + SandboxType::LinuxSeccomp, + &output + )); + } + + #[test] + fn sandbox_detection_identifies_keyword_in_stderr() { + let output = make_exec_output(1, "", "Operation not permitted", ""); + assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output)); + } + + #[test] + fn sandbox_detection_respects_quick_reject_exit_codes() { + let output = make_exec_output(127, "", "command not found", ""); + assert!(!is_likely_sandbox_denied( + SandboxType::LinuxSeccomp, + &output + )); + } + + #[test] + fn sandbox_detection_ignores_non_sandbox_mode() { + let output = make_exec_output(1, "", "Operation not permitted", ""); + assert!(!is_likely_sandbox_denied(SandboxType::None, &output)); + } + + #[test] + fn sandbox_detection_uses_aggregated_output() { + let output = make_exec_output( + 101, + "", + "", + "cargo failed: Read-only file system when writing target", + ); + assert!(is_likely_sandbox_denied( + SandboxType::MacosSeatbelt, + &output + )); + } + + #[cfg(unix)] + #[test] + fn sandbox_detection_flags_sigsys_exit_code() { + let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS; + let output = make_exec_output(exit_code, "", "", ""); + assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output)); + } +} diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index f475aad67e..9a1956f66f 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -380,6 +380,23 @@ mod tests { assert_eq!(message, "failed in sandbox: sandbox stderr"); } + #[test] + fn sandbox_failure_message_falls_back_to_aggregated_output() { + let output = ExecToolCallOutput { + exit_code: 101, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new("aggregate text".to_string()), + duration: Duration::from_millis(10), + timed_out: false, + }; + let err = SandboxErr::Denied { + output: Box::new(output), + }; + let message = sandbox_failure_message(err); + assert_eq!(message, "failed in sandbox: aggregate text"); + } + #[test] fn normalize_function_error_synthesizes_payload() { let err = FunctionCallError::RespondToModel("boom".to_string()); diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index 27e709f2c3..f35b595f35 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -413,79 +413,6 @@ line Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_sandbox_denied_truncates_error_output() -> Result<()> { - skip_if_no_network!(Ok(())); - - let server = start_mock_server().await; - let mut builder = test_codex(); - let test = builder.build(&server).await?; - - let call_id = "shell-denied"; - let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz"; - let script = format!( - "for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF", - ); - let args = json!({ - "command": ["/bin/sh", "-c", script], - "timeout_ms": 1_000, - }); - - mount_sse_once( - &server, - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - ) - .await; - let second_mock = mount_sse_once( - &server, - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ) - .await; - - submit_turn( - &test, - "attempt to write in read-only sandbox", - AskForApproval::Never, - SandboxPolicy::ReadOnly, - ) - .await?; - - let denied_item = second_mock.single_request().function_call_output(call_id); - - let output = denied_item - .get("output") - .and_then(Value::as_str) - .expect("denied output string"); - - let sandbox_pattern = r#"(?s)^Exit code: -?\d+ -Wall time: [0-9]+(?:\.[0-9]+)? seconds -Total output lines: \d+ -Output: - -failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*? -\[\.{3} omitted \d+ of \d+ lines \.{3}\] -.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.* -\n?$"#; - let sandbox_regex = Regex::new(sandbox_pattern)?; - if !sandbox_regex.is_match(output) { - let fallback_pattern = r#"(?s)^Total output lines: \d+ - -failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz -.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.* -.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#; - assert_regex_match(fallback_pattern, output); - } - - Ok(()) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_spawn_failure_truncates_exec_error() -> Result<()> { skip_if_no_network!(Ok(()));