Skip to content

Commit a63ec9b

Browse files
committed
test(app-server): add zsh-fork v2 e2e and test-client coverage
1 parent 5c21e7f commit a63ec9b

File tree

3 files changed

+918
-3
lines changed

3 files changed

+918
-3
lines changed

codex-rs/app-server-test-client/src/lib.rs

Lines changed: 202 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::collections::VecDeque;
2+
use std::ffi::OsString;
23
use std::fs;
34
use std::fs::OpenOptions;
45
use std::io::BufRead;
@@ -29,6 +30,7 @@ use codex_app_server_protocol::ClientRequest;
2930
use codex_app_server_protocol::CommandExecutionApprovalDecision;
3031
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
3132
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
33+
use codex_app_server_protocol::CommandExecutionStatus;
3234
use codex_app_server_protocol::DynamicToolSpec;
3335
use codex_app_server_protocol::FileChangeApprovalDecision;
3436
use codex_app_server_protocol::FileChangeRequestApprovalParams;
@@ -55,6 +57,7 @@ use codex_app_server_protocol::SendUserMessageParams;
5557
use codex_app_server_protocol::SendUserMessageResponse;
5658
use codex_app_server_protocol::ServerNotification;
5759
use codex_app_server_protocol::ServerRequest;
60+
use codex_app_server_protocol::ThreadItem;
5861
use codex_app_server_protocol::ThreadListParams;
5962
use codex_app_server_protocol::ThreadListResponse;
6063
use codex_app_server_protocol::ThreadResumeParams;
@@ -78,6 +81,30 @@ use tungstenite::stream::MaybeTlsStream;
7881
use url::Url;
7982
use uuid::Uuid;
8083

84+
const NOTIFICATIONS_TO_OPT_OUT: &[&str] = &[
85+
// Legacy codex/event (v1-style) deltas.
86+
"codex/event/agent_message_content_delta",
87+
"codex/event/agent_message_delta",
88+
"codex/event/agent_reasoning_delta",
89+
"codex/event/reasoning_content_delta",
90+
"codex/event/reasoning_raw_content_delta",
91+
"codex/event/exec_command_output_delta",
92+
// Other legacy events.
93+
"codex/event/exec_approval_request",
94+
"codex/event/exec_command_begin",
95+
"codex/event/exec_command_end",
96+
"codex/event/exec_output",
97+
"codex/event/item_started",
98+
"codex/event/item_completed",
99+
// v2 item deltas.
100+
"item/agentMessage/delta",
101+
"item/plan/delta",
102+
"item/commandExecution/outputDelta",
103+
"item/fileChange/outputDelta",
104+
"item/reasoning/summaryTextDelta",
105+
"item/reasoning/textDelta",
106+
];
107+
81108
/// Minimal launcher that initializes the Codex app-server and logs the handshake.
82109
#[derive(Parser)]
83110
#[command(author = "Codex", version, about = "Bootstrap Codex app-server", long_about = None)]
@@ -180,6 +207,18 @@ enum CliCommand {
180207
/// Follow-up user message for the second turn.
181208
follow_up_message: String,
182209
},
210+
/// Trigger zsh-fork multi-subcommand approvals and assert expected approval behavior.
211+
#[command(name = "trigger-zsh-fork-multi-cmd-approval")]
212+
TriggerZshForkMultiCmdApproval {
213+
/// Optional prompt; defaults to an explicit `/usr/bin/true && /usr/bin/true` command.
214+
user_message: Option<String>,
215+
/// Minimum number of command-approval callbacks expected in the turn.
216+
#[arg(long, default_value_t = 2)]
217+
min_approvals: usize,
218+
/// One-based approval index to abort (e.g. --abort-on 2 aborts the second approval).
219+
#[arg(long)]
220+
abort_on: Option<usize>,
221+
},
183222
/// Trigger the ChatGPT login flow and wait for completion.
184223
TestLogin,
185224
/// Fetch the current account rate limits from the Codex app-server.
@@ -265,6 +304,21 @@ pub fn run() -> Result<()> {
265304
&dynamic_tools,
266305
)
267306
}
307+
CliCommand::TriggerZshForkMultiCmdApproval {
308+
user_message,
309+
min_approvals,
310+
abort_on,
311+
} => {
312+
let endpoint = resolve_endpoint(codex_bin, url)?;
313+
trigger_zsh_fork_multi_cmd_approval(
314+
&endpoint,
315+
&config_overrides,
316+
user_message,
317+
min_approvals,
318+
abort_on,
319+
&dynamic_tools,
320+
)
321+
}
268322
CliCommand::TestLogin => {
269323
ensure_dynamic_tools_unused(&dynamic_tools, "test-login")?;
270324
let endpoint = resolve_endpoint(codex_bin, url)?;
@@ -470,6 +524,101 @@ fn send_message_v2_endpoint(
470524
)
471525
}
472526

527+
fn trigger_zsh_fork_multi_cmd_approval(
528+
endpoint: &Endpoint,
529+
config_overrides: &[String],
530+
user_message: Option<String>,
531+
min_approvals: usize,
532+
abort_on: Option<usize>,
533+
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
534+
) -> Result<()> {
535+
if let Some(abort_on) = abort_on
536+
&& abort_on == 0
537+
{
538+
bail!("--abort-on must be >= 1 when provided");
539+
}
540+
541+
let default_prompt = "Run this exact command using shell command execution without rewriting or splitting it: /usr/bin/true && /usr/bin/true";
542+
let message = user_message.unwrap_or_else(|| default_prompt.to_string());
543+
544+
let mut client = CodexClient::connect(endpoint, config_overrides)?;
545+
let initialize = client.initialize()?;
546+
println!("< initialize response: {initialize:?}");
547+
548+
let thread_response = client.thread_start(ThreadStartParams {
549+
dynamic_tools: dynamic_tools.clone(),
550+
..Default::default()
551+
})?;
552+
println!("< thread/start response: {thread_response:?}");
553+
554+
client.command_approval_behavior = match abort_on {
555+
Some(index) => CommandApprovalBehavior::AbortOn(index),
556+
None => CommandApprovalBehavior::AlwaysAccept,
557+
};
558+
client.command_approval_count = 0;
559+
client.command_approval_item_ids.clear();
560+
client.command_execution_statuses.clear();
561+
client.last_turn_status = None;
562+
563+
let mut turn_params = TurnStartParams {
564+
thread_id: thread_response.thread.id.clone(),
565+
input: vec![V2UserInput::Text {
566+
text: message,
567+
text_elements: Vec::new(),
568+
}],
569+
..Default::default()
570+
};
571+
turn_params.approval_policy = Some(AskForApproval::OnRequest);
572+
turn_params.sandbox_policy = Some(SandboxPolicy::ReadOnly {
573+
access: ReadOnlyAccess::FullAccess,
574+
});
575+
576+
let turn_response = client.turn_start(turn_params)?;
577+
println!("< turn/start response: {turn_response:?}");
578+
client.stream_turn(&thread_response.thread.id, &turn_response.turn.id)?;
579+
580+
if client.command_approval_count < min_approvals {
581+
bail!(
582+
"expected at least {min_approvals} command approvals, got {}",
583+
client.command_approval_count
584+
);
585+
}
586+
let mut approvals_per_item = std::collections::BTreeMap::new();
587+
for item_id in &client.command_approval_item_ids {
588+
*approvals_per_item.entry(item_id.clone()).or_insert(0usize) += 1;
589+
}
590+
let max_approvals_for_one_item = approvals_per_item.values().copied().max().unwrap_or(0);
591+
if max_approvals_for_one_item < min_approvals {
592+
bail!(
593+
"expected at least {min_approvals} approvals for one command item, got max {max_approvals_for_one_item} with map {approvals_per_item:?}"
594+
);
595+
}
596+
597+
let last_command_status = client.command_execution_statuses.last();
598+
if abort_on.is_none() {
599+
if last_command_status != Some(&CommandExecutionStatus::Completed) {
600+
bail!("expected completed command execution, got {last_command_status:?}");
601+
}
602+
if client.last_turn_status != Some(TurnStatus::Completed) {
603+
bail!(
604+
"expected completed turn in all-accept flow, got {:?}",
605+
client.last_turn_status
606+
);
607+
}
608+
} else if last_command_status == Some(&CommandExecutionStatus::Completed) {
609+
bail!(
610+
"expected non-completed command execution in mixed approval/decline flow, got {last_command_status:?}"
611+
);
612+
}
613+
614+
println!(
615+
"[zsh-fork multi-approval summary] approvals={}, approvals_per_item={approvals_per_item:?}, command_statuses={:?}, turn_status={:?}",
616+
client.command_approval_count, client.command_execution_statuses, client.last_turn_status
617+
);
618+
619+
Ok(())
620+
}
621+
473622
fn resume_message_v2(
474623
endpoint: &Endpoint,
475624
config_overrides: &[String],
@@ -791,6 +940,17 @@ enum ClientTransport {
791940
struct CodexClient {
792941
transport: ClientTransport,
793942
pending_notifications: VecDeque<JSONRPCNotification>,
943+
command_approval_behavior: CommandApprovalBehavior,
944+
command_approval_count: usize,
945+
command_approval_item_ids: Vec<String>,
946+
command_execution_statuses: Vec<CommandExecutionStatus>,
947+
last_turn_status: Option<TurnStatus>,
948+
}
949+
950+
#[derive(Debug, Clone, Copy)]
951+
enum CommandApprovalBehavior {
952+
AlwaysAccept,
953+
AbortOn(usize),
794954
}
795955

796956
impl CodexClient {
@@ -804,6 +964,14 @@ impl CodexClient {
804964
fn spawn_stdio(codex_bin: &Path, config_overrides: &[String]) -> Result<Self> {
805965
let codex_bin_display = codex_bin.display();
806966
let mut cmd = Command::new(codex_bin);
967+
if let Some(codex_bin_parent) = codex_bin.parent() {
968+
let mut path = OsString::from(codex_bin_parent.as_os_str());
969+
if let Some(existing_path) = std::env::var_os("PATH") {
970+
path.push(":");
971+
path.push(existing_path);
972+
}
973+
cmd.env("PATH", path);
974+
}
807975
for override_kv in config_overrides {
808976
cmd.arg("--config").arg(override_kv);
809977
}
@@ -831,6 +999,11 @@ impl CodexClient {
831999
stdout: BufReader::new(stdout),
8321000
},
8331001
pending_notifications: VecDeque::new(),
1002+
command_approval_behavior: CommandApprovalBehavior::AlwaysAccept,
1003+
command_approval_count: 0,
1004+
command_approval_item_ids: Vec::new(),
1005+
command_execution_statuses: Vec::new(),
1006+
last_turn_status: None,
8341007
})
8351008
}
8361009

@@ -847,6 +1020,11 @@ impl CodexClient {
8471020
socket: Box::new(socket),
8481021
},
8491022
pending_notifications: VecDeque::new(),
1023+
command_approval_behavior: CommandApprovalBehavior::AlwaysAccept,
1024+
command_approval_count: 0,
1025+
command_approval_item_ids: Vec::new(),
1026+
command_execution_statuses: Vec::new(),
1027+
last_turn_status: None,
8501028
})
8511029
}
8521030

@@ -862,7 +1040,12 @@ impl CodexClient {
8621040
},
8631041
capabilities: Some(InitializeCapabilities {
8641042
experimental_api: true,
865-
opt_out_notification_methods: None,
1043+
opt_out_notification_methods: Some(
1044+
NOTIFICATIONS_TO_OPT_OUT
1045+
.iter()
1046+
.map(|method| (*method).to_string())
1047+
.collect(),
1048+
),
8661049
}),
8671050
},
8681051
};
@@ -1121,10 +1304,14 @@ impl CodexClient {
11211304
println!("\n< item started: {:?}", payload.item);
11221305
}
11231306
ServerNotification::ItemCompleted(payload) => {
1307+
if let ThreadItem::CommandExecution { status, .. } = payload.item.clone() {
1308+
self.command_execution_statuses.push(status);
1309+
}
11241310
println!("< item completed: {:?}", payload.item);
11251311
}
11261312
ServerNotification::TurnCompleted(payload) => {
11271313
if payload.turn.id == turn_id {
1314+
self.last_turn_status = Some(payload.turn.status.clone());
11281315
println!("\n< turn/completed notification: {:?}", payload.turn.status);
11291316
if payload.turn.status == TurnStatus::Failed
11301317
&& let Some(error) = payload.turn.error
@@ -1313,6 +1500,8 @@ impl CodexClient {
13131500
println!(
13141501
"\n< commandExecution approval requested for thread {thread_id}, turn {turn_id}, item {item_id}, approval {approval_id}"
13151502
);
1503+
self.command_approval_count += 1;
1504+
self.command_approval_item_ids.push(item_id.clone());
13161505
if let Some(reason) = reason.as_deref() {
13171506
println!("< reason: {reason}");
13181507
}
@@ -1331,11 +1520,21 @@ impl CodexClient {
13311520
println!("< proposed execpolicy amendment: {execpolicy_amendment:?}");
13321521
}
13331522

1523+
let decision = match self.command_approval_behavior {
1524+
CommandApprovalBehavior::AlwaysAccept => CommandExecutionApprovalDecision::Accept,
1525+
CommandApprovalBehavior::AbortOn(index) if self.command_approval_count == index => {
1526+
CommandExecutionApprovalDecision::Cancel
1527+
}
1528+
CommandApprovalBehavior::AbortOn(_) => CommandExecutionApprovalDecision::Accept,
1529+
};
13341530
let response = CommandExecutionRequestApprovalResponse {
1335-
decision: CommandExecutionApprovalDecision::Accept,
1531+
decision: decision.clone(),
13361532
};
13371533
self.send_server_request_response(request_id, &response)?;
1338-
println!("< approved commandExecution request for item {item_id}");
1534+
println!(
1535+
"< commandExecution decision for approval #{} on item {item_id}: {:?}",
1536+
self.command_approval_count, decision
1537+
);
13391538
Ok(())
13401539
}
13411540

codex-rs/app-server/tests/suite/v2/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ mod thread_start;
2828
mod thread_unarchive;
2929
mod turn_interrupt;
3030
mod turn_start;
31+
mod turn_start_zsh_fork;
3132
mod turn_steer;

0 commit comments

Comments
 (0)