@@ -9,6 +9,7 @@ use codex_core::protocol::InputItem;
9
9
use codex_core:: protocol:: Op ;
10
10
use codex_core:: protocol:: SandboxPolicy ;
11
11
use codex_protocol:: config_types:: ReasoningSummary ;
12
+ use core_test_support:: assert_regex_match;
12
13
use core_test_support:: responses:: ev_assistant_message;
13
14
use core_test_support:: responses:: ev_completed;
14
15
use core_test_support:: responses:: ev_custom_tool_call;
@@ -21,6 +22,7 @@ use core_test_support::skip_if_no_network;
21
22
use core_test_support:: test_codex:: TestCodex ;
22
23
use core_test_support:: test_codex:: test_codex;
23
24
use core_test_support:: wait_for_event;
25
+ use regex_lite:: Regex ;
24
26
use serde_json:: Value ;
25
27
use serde_json:: json;
26
28
use wiremock:: Request ;
@@ -254,10 +256,8 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
254
256
"expected exit code 0 after rerunning without escalation" ,
255
257
) ;
256
258
let stdout = output_json[ "output" ] . as_str ( ) . unwrap_or_default ( ) ;
257
- assert ! (
258
- stdout. contains( "shell ok" ) ,
259
- "expected stdout to include command output, got {stdout:?}"
260
- ) ;
259
+ let stdout_pattern = r"(?s)^shell ok\n?$" ;
260
+ assert_regex_match ( stdout_pattern, stdout) ;
261
261
262
262
Ok ( ( ) )
263
263
}
@@ -437,30 +437,24 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
437
437
) ;
438
438
439
439
let stdout = output_json[ "output" ] . as_str ( ) . unwrap_or_default ( ) ;
440
- assert ! (
441
- stdout . contains ( "command timed out after " ) ,
442
- "expected timeout prefix, got {stdout:?}"
443
- ) ;
444
- let third_line = stdout . lines ( ) . nth ( 2 ) . unwrap_or_default ( ) ;
445
- let duration_ms = third_line
446
- . strip_prefix ( "command timed out after " )
447
- . and_then ( |line| line . strip_suffix ( " milliseconds" ) )
448
- . and_then ( |value| value . parse :: < u64 > ( ) . ok ( ) )
440
+ let timeout_pattern = r"(?s)^Total output lines: \d+
441
+
442
+ command timed out after (?P<ms>\d+) milliseconds
443
+ line
444
+ .*$" ;
445
+ let captures = assert_regex_match ( timeout_pattern , stdout ) ;
446
+ let duration_ms = captures
447
+ . name ( "ms" )
448
+ . and_then ( |m| m . as_str ( ) . parse :: < u64 > ( ) . ok ( ) )
449
449
. unwrap_or_default ( ) ;
450
450
assert ! (
451
451
duration_ms >= timeout_ms,
452
452
"expected duration >= configured timeout, got {duration_ms} (timeout {timeout_ms})"
453
453
) ;
454
454
} else {
455
455
// Fallback: accept the signal classification path to deflake the test.
456
- assert ! (
457
- output_str. contains( "execution error" ) ,
458
- "unexpected non-JSON output: {output_str:?}"
459
- ) ;
460
- assert ! (
461
- output_str. contains( "Signal(" ) || output_str. to_lowercase( ) . contains( "signal" ) ,
462
- "expected signal classification in error output, got {output_str:?}"
463
- ) ;
456
+ let signal_pattern = r"(?is)^execution error:.*signal.*$" ;
457
+ assert_regex_match ( signal_pattern, output_str) ;
464
458
}
465
459
466
460
Ok ( ( ) )
@@ -518,30 +512,25 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
518
512
. and_then ( Value :: as_str)
519
513
. expect ( "denied output string" ) ;
520
514
521
- assert ! (
522
- output. contains( "failed in sandbox: " ) ,
523
- "expected sandbox error prefix, got {output:?}"
524
- ) ;
525
- assert ! (
526
- output. contains( "[... omitted" ) ,
527
- "expected truncated marker, got {output:?}"
528
- ) ;
529
- assert ! (
530
- output. contains( long_line) ,
531
- "expected truncated stderr sample, got {output:?}"
532
- ) ;
533
- // Linux distributions may surface sandbox write failures as different errno messages
534
- // depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a
535
- // small set of common variants to keep this cross-platform.
536
- let denial_markers = [
537
- "Operation not permitted" , // EPERM
538
- "Permission denied" , // EACCES
539
- "Read-only file system" , // EROFS
540
- ] ;
541
- assert ! (
542
- denial_markers. iter( ) . any( |m| output. contains( m) ) ,
543
- "expected sandbox denial message, got {output:?}"
544
- ) ;
515
+ let sandbox_pattern = r#"(?s)^Exit code: -?\d+
516
+ Wall time: [0-9]+(?:\.[0-9]+)? seconds
517
+ Total output lines: \d+
518
+ Output:
519
+ Total output lines: \d+
520
+
521
+ failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
522
+ \[\.{3} omitted \d+ of \d+ lines \.{3}\]
523
+ .*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
524
+ \n?$"# ;
525
+ let sandbox_regex = Regex :: new ( sandbox_pattern) ?;
526
+ if !sandbox_regex. is_match ( output) {
527
+ let fallback_pattern = r#"(?s)^Total output lines: \d+
528
+
529
+ failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
530
+ .*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
531
+ .*(?:Operation not permitted|Permission denied|Read-only file system).*$"# ;
532
+ assert_regex_match ( fallback_pattern, output) ;
533
+ }
545
534
546
535
Ok ( ( ) )
547
536
}
@@ -604,10 +593,23 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
604
593
. and_then ( Value :: as_str)
605
594
. expect ( "spawn failure output string" ) ;
606
595
607
- assert ! (
608
- output. contains( "execution error:" ) ,
609
- "expected execution error prefix, got {output:?}"
610
- ) ;
596
+ let spawn_error_pattern = r#"(?s)^Exit code: -?\d+
597
+ Wall time: [0-9]+(?:\.[0-9]+)? seconds
598
+ Output:
599
+ execution error: .*$"# ;
600
+ let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+
601
+ Wall time: [0-9]+(?:\.[0-9]+)? seconds
602
+ Total output lines: \d+
603
+ Output:
604
+ Total output lines: \d+
605
+
606
+ execution error: .*$"# ;
607
+ let spawn_error_regex = Regex :: new ( spawn_error_pattern) ?;
608
+ let spawn_truncated_regex = Regex :: new ( spawn_truncated_pattern) ?;
609
+ if !spawn_error_regex. is_match ( output) && !spawn_truncated_regex. is_match ( output) {
610
+ let fallback_pattern = r"(?s)^execution error: .*$" ;
611
+ assert_regex_match ( fallback_pattern, output) ;
612
+ }
611
613
assert ! ( output. len( ) <= 10 * 1024 ) ;
612
614
613
615
Ok ( ( ) )
0 commit comments