Skip to content

Commit b2d81a7

Browse files
authored
Make output assertions more explicit (#4784)
Match using precise regexes.
1 parent 77a8b7f commit b2d81a7

File tree

6 files changed

+95
-81
lines changed

6 files changed

+95
-81
lines changed

codex-rs/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/core/tests/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ path = "lib.rs"
1010
anyhow = { workspace = true }
1111
assert_cmd = { workspace = true }
1212
codex-core = { workspace = true }
13+
regex-lite = { workspace = true }
1314
serde_json = { workspace = true }
1415
tempfile = { workspace = true }
1516
tokio = { workspace = true, features = ["time"] }

codex-rs/core/tests/common/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use codex_core::CodexConversation;
66
use codex_core::config::Config;
77
use codex_core::config::ConfigOverrides;
88
use codex_core::config::ConfigToml;
9+
use regex_lite::Regex;
910

1011
#[cfg(target_os = "linux")]
1112
use assert_cmd::cargo::cargo_bin;
@@ -14,6 +15,16 @@ pub mod responses;
1415
pub mod test_codex;
1516
pub mod test_codex_exec;
1617

18+
#[track_caller]
19+
pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> {
20+
let regex = Regex::new(pattern).unwrap_or_else(|err| {
21+
panic!("failed to compile regex {pattern:?}: {err}");
22+
});
23+
regex
24+
.captures(actual)
25+
.unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}"))
26+
}
27+
1728
/// Returns a default `Config` whose on-disk state is confined to the provided
1829
/// temporary directory. Using a per-test directory keeps tests hermetic and
1930
/// avoids clobbering a developer’s real `~/.codex`.

codex-rs/core/tests/suite/shell_serialization.rs

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use codex_core::protocol::InputItem;
88
use codex_core::protocol::Op;
99
use codex_core::protocol::SandboxPolicy;
1010
use codex_protocol::config_types::ReasoningSummary;
11+
use core_test_support::assert_regex_match;
1112
use core_test_support::responses::ev_assistant_message;
1213
use core_test_support::responses::ev_completed;
1314
use core_test_support::responses::ev_function_call;
@@ -131,10 +132,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
131132
.get("output")
132133
.and_then(Value::as_str)
133134
.unwrap_or_default();
134-
assert!(
135-
stdout.contains("shell json"),
136-
"expected stdout to include command output, got {stdout:?}"
137-
);
135+
assert_regex_match(r"(?s)^shell json\n?$", stdout);
138136

139137
Ok(())
140138
}
@@ -190,18 +188,12 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
190188
serde_json::from_str::<Value>(output).is_err(),
191189
"expected structured shell output to be plain text",
192190
);
193-
assert!(
194-
output.starts_with("Exit code: 0\n"),
195-
"expected exit code prefix, got {output:?}",
196-
);
197-
assert!(
198-
output.contains("\nOutput:\n"),
199-
"expected Output section, got {output:?}"
200-
);
201-
assert!(
202-
output.contains("freeform shell"),
203-
"expected stdout content, got {output:?}"
204-
);
191+
let expected_pattern = r"(?s)^Exit code: 0
192+
Wall time: [0-9]+(?:\.[0-9]+)? seconds
193+
Output:
194+
freeform shell
195+
?$";
196+
assert_regex_match(expected_pattern, output);
205197

206198
Ok(())
207199
}
@@ -259,18 +251,27 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
259251
serde_json::from_str::<Value>(output).is_err(),
260252
"expected truncated shell output to be plain text",
261253
);
262-
assert!(
263-
output.starts_with("Exit code: 0\n"),
264-
"expected exit code prefix, got {output:?}",
265-
);
266-
assert!(
267-
output.lines().any(|line| line == "Total output lines: 400"),
268-
"expected total output lines marker, got {output:?}",
269-
);
270-
assert!(
271-
output.contains("[... omitted"),
272-
"expected truncated marker, got {output:?}",
273-
);
254+
let truncated_pattern = r#"(?s)^Exit code: 0
255+
Wall time: [0-9]+(?:\.[0-9]+)? seconds
256+
Total output lines: 400
257+
Output:
258+
Total output lines: 400
259+
260+
1
261+
2
262+
3
263+
4
264+
5
265+
6
266+
.*\[\.{3} omitted \d+ of 400 lines \.{3}\]
267+
268+
.*\n396
269+
397
270+
398
271+
399
272+
400
273+
$"#;
274+
assert_regex_match(truncated_pattern, output);
274275

275276
Ok(())
276277
}

codex-rs/core/tests/suite/tool_harness.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use codex_core::protocol::Op;
99
use codex_core::protocol::SandboxPolicy;
1010
use codex_protocol::config_types::ReasoningSummary;
1111
use codex_protocol::plan_tool::StepStatus;
12+
use core_test_support::assert_regex_match;
1213
use core_test_support::responses;
1314
use core_test_support::responses::ev_apply_patch_function_call;
1415
use core_test_support::responses::ev_assistant_message;
@@ -116,10 +117,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
116117
let exec_output: Value = serde_json::from_str(output_text)?;
117118
assert_eq!(exec_output["metadata"]["exit_code"], 0);
118119
let stdout = exec_output["output"].as_str().expect("stdout field");
119-
assert!(
120-
stdout.contains("tool harness"),
121-
"expected stdout to contain command output, got {stdout:?}"
122-
);
120+
assert_regex_match(r"(?s)^tool harness\n?$", stdout);
123121

124122
Ok(())
125123
}

codex-rs/core/tests/suite/tools.rs

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use codex_core::protocol::InputItem;
99
use codex_core::protocol::Op;
1010
use codex_core::protocol::SandboxPolicy;
1111
use codex_protocol::config_types::ReasoningSummary;
12+
use core_test_support::assert_regex_match;
1213
use core_test_support::responses::ev_assistant_message;
1314
use core_test_support::responses::ev_completed;
1415
use core_test_support::responses::ev_custom_tool_call;
@@ -21,6 +22,7 @@ use core_test_support::skip_if_no_network;
2122
use core_test_support::test_codex::TestCodex;
2223
use core_test_support::test_codex::test_codex;
2324
use core_test_support::wait_for_event;
25+
use regex_lite::Regex;
2426
use serde_json::Value;
2527
use serde_json::json;
2628
use wiremock::Request;
@@ -254,10 +256,8 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
254256
"expected exit code 0 after rerunning without escalation",
255257
);
256258
let stdout = output_json["output"].as_str().unwrap_or_default();
257-
assert!(
258-
stdout.contains("shell ok"),
259-
"expected stdout to include command output, got {stdout:?}"
260-
);
259+
let stdout_pattern = r"(?s)^shell ok\n?$";
260+
assert_regex_match(stdout_pattern, stdout);
261261

262262
Ok(())
263263
}
@@ -437,30 +437,24 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
437437
);
438438

439439
let stdout = output_json["output"].as_str().unwrap_or_default();
440-
assert!(
441-
stdout.contains("command timed out after "),
442-
"expected timeout prefix, got {stdout:?}"
443-
);
444-
let third_line = stdout.lines().nth(2).unwrap_or_default();
445-
let duration_ms = third_line
446-
.strip_prefix("command timed out after ")
447-
.and_then(|line| line.strip_suffix(" milliseconds"))
448-
.and_then(|value| value.parse::<u64>().ok())
440+
let timeout_pattern = r"(?s)^Total output lines: \d+
441+
442+
command timed out after (?P<ms>\d+) milliseconds
443+
line
444+
.*$";
445+
let captures = assert_regex_match(timeout_pattern, stdout);
446+
let duration_ms = captures
447+
.name("ms")
448+
.and_then(|m| m.as_str().parse::<u64>().ok())
449449
.unwrap_or_default();
450450
assert!(
451451
duration_ms >= timeout_ms,
452452
"expected duration >= configured timeout, got {duration_ms} (timeout {timeout_ms})"
453453
);
454454
} else {
455455
// Fallback: accept the signal classification path to deflake the test.
456-
assert!(
457-
output_str.contains("execution error"),
458-
"unexpected non-JSON output: {output_str:?}"
459-
);
460-
assert!(
461-
output_str.contains("Signal(") || output_str.to_lowercase().contains("signal"),
462-
"expected signal classification in error output, got {output_str:?}"
463-
);
456+
let signal_pattern = r"(?is)^execution error:.*signal.*$";
457+
assert_regex_match(signal_pattern, output_str);
464458
}
465459

466460
Ok(())
@@ -518,30 +512,25 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
518512
.and_then(Value::as_str)
519513
.expect("denied output string");
520514

521-
assert!(
522-
output.contains("failed in sandbox: "),
523-
"expected sandbox error prefix, got {output:?}"
524-
);
525-
assert!(
526-
output.contains("[... omitted"),
527-
"expected truncated marker, got {output:?}"
528-
);
529-
assert!(
530-
output.contains(long_line),
531-
"expected truncated stderr sample, got {output:?}"
532-
);
533-
// Linux distributions may surface sandbox write failures as different errno messages
534-
// depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a
535-
// small set of common variants to keep this cross-platform.
536-
let denial_markers = [
537-
"Operation not permitted", // EPERM
538-
"Permission denied", // EACCES
539-
"Read-only file system", // EROFS
540-
];
541-
assert!(
542-
denial_markers.iter().any(|m| output.contains(m)),
543-
"expected sandbox denial message, got {output:?}"
544-
);
515+
let sandbox_pattern = r#"(?s)^Exit code: -?\d+
516+
Wall time: [0-9]+(?:\.[0-9]+)? seconds
517+
Total output lines: \d+
518+
Output:
519+
Total output lines: \d+
520+
521+
failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
522+
\[\.{3} omitted \d+ of \d+ lines \.{3}\]
523+
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
524+
\n?$"#;
525+
let sandbox_regex = Regex::new(sandbox_pattern)?;
526+
if !sandbox_regex.is_match(output) {
527+
let fallback_pattern = r#"(?s)^Total output lines: \d+
528+
529+
failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
530+
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
531+
.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
532+
assert_regex_match(fallback_pattern, output);
533+
}
545534

546535
Ok(())
547536
}
@@ -604,10 +593,23 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
604593
.and_then(Value::as_str)
605594
.expect("spawn failure output string");
606595

607-
assert!(
608-
output.contains("execution error:"),
609-
"expected execution error prefix, got {output:?}"
610-
);
596+
let spawn_error_pattern = r#"(?s)^Exit code: -?\d+
597+
Wall time: [0-9]+(?:\.[0-9]+)? seconds
598+
Output:
599+
execution error: .*$"#;
600+
let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+
601+
Wall time: [0-9]+(?:\.[0-9]+)? seconds
602+
Total output lines: \d+
603+
Output:
604+
Total output lines: \d+
605+
606+
execution error: .*$"#;
607+
let spawn_error_regex = Regex::new(spawn_error_pattern)?;
608+
let spawn_truncated_regex = Regex::new(spawn_truncated_pattern)?;
609+
if !spawn_error_regex.is_match(output) && !spawn_truncated_regex.is_match(output) {
610+
let fallback_pattern = r"(?s)^execution error: .*$";
611+
assert_regex_match(fallback_pattern, output);
612+
}
611613
assert!(output.len() <= 10 * 1024);
612614

613615
Ok(())

0 commit comments

Comments
 (0)