Skip to content

Commit 9e784e0

Browse files
authored
fix: background PocketIC output to files (#209)
1 parent 7a5447b commit 9e784e0

File tree

7 files changed

+99
-195
lines changed

7 files changed

+99
-195
lines changed
Lines changed: 15 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
1-
use std::{
2-
io::{BufRead, BufReader},
3-
process::{Child, Command, Stdio},
4-
sync::{
5-
Arc,
6-
atomic::{AtomicBool, Ordering},
7-
},
8-
thread,
9-
time::Duration,
10-
};
11-
121
use clap::Args;
13-
use ic_agent::{Agent, AgentError};
2+
use ic_agent::AgentError;
143
use icp::{
154
fs::lock::LockError,
165
identity::manifest::{IdentityList, LoadIdentityManifestError},
176
manifest,
18-
network::{Configuration, NetworkDirectory, RunNetworkError, run_network},
7+
network::{Configuration, RunNetworkError, run_network},
198
project::DEFAULT_LOCAL_NETWORK_NAME,
209
};
21-
use sysinfo::Pid;
2210
use tracing::debug;
2311

2412
use icp::context::Context;
@@ -56,6 +44,12 @@ pub(crate) enum CommandError {
5644
#[error("failed to create network directory")]
5745
CreateNetworkDir { source: icp::fs::Error },
5846

47+
#[error(transparent)]
48+
LoadNetworkDescriptor(#[from] icp::network::directory::LoadNetworkFileError),
49+
50+
#[error("network '{name}' is already running")]
51+
AlreadyRunning { name: String },
52+
5953
#[error("failed to cleanup canister ID store for environment '{env}'")]
6054
CleanupCanisterIdStore {
6155
source: icp::store_id::CleanupError,
@@ -65,9 +59,6 @@ pub(crate) enum CommandError {
6559
#[error(transparent)]
6660
NetworkAccess(#[from] icp::network::AccessError),
6761

68-
#[error("timed out waiting for network to start: {err}")]
69-
Timeout { err: String },
70-
7162
#[error(transparent)]
7263
Identities(#[from] LoadIdentityManifestError),
7364

@@ -109,6 +100,12 @@ pub(crate) async fn exec(ctx: &Context, args: &RunArgs) -> Result<(), CommandErr
109100
nd.ensure_exists()
110101
.map_err(|e| CommandError::CreateNetworkDir { source: e })?;
111102

103+
if nd.load_network_descriptor().await?.is_some() {
104+
return Err(CommandError::AlreadyRunning {
105+
name: args.name.to_owned(),
106+
});
107+
}
108+
112109
// Clean up any existing canister ID mappings of which environment is on this network
113110
for env in p.environments.values() {
114111
if env.network == *network {
@@ -135,154 +132,6 @@ pub(crate) async fn exec(ctx: &Context, args: &RunArgs) -> Result<(), CommandErr
135132
debug!("Project root: {pdir}");
136133
debug!("Network root: {}", nd.network_root);
137134

138-
if args.background {
139-
let mut child = run_in_background()?;
140-
nd.save_background_network_runner_pid(Pid::from(child.id() as usize))
141-
.await?;
142-
relay_child_output_until_healthy(ctx, &mut child, &nd).await?;
143-
} else {
144-
run_network(
145-
cfg, // config
146-
nd, // nd
147-
pdir, // project_root
148-
seed_accounts, // seed_accounts
149-
)
150-
.await?;
151-
}
135+
run_network(cfg, nd, pdir, seed_accounts, args.background).await?;
152136
Ok(())
153137
}
154-
155-
async fn relay_child_output_until_healthy(
156-
ctx: &Context,
157-
child: &mut Child,
158-
nd: &NetworkDirectory,
159-
) -> Result<(), CommandError> {
160-
let stdout = child.stdout.take().expect("Failed to take child stdout");
161-
let stderr = child.stderr.take().expect("Failed to take child stderr");
162-
163-
let stop_printing_child_output = Arc::new(AtomicBool::new(false));
164-
165-
// Spawn threads to relay output
166-
let term = ctx.term.clone();
167-
let should_stop_clone = Arc::clone(&stop_printing_child_output);
168-
let stdout_thread = thread::spawn(move || {
169-
let reader = BufReader::new(stdout);
170-
for line in reader.lines() {
171-
if should_stop_clone.load(Ordering::Relaxed) {
172-
break;
173-
}
174-
if let Ok(line) = line {
175-
let _ = term.write_line(&line);
176-
}
177-
}
178-
});
179-
180-
let term = ctx.term.clone();
181-
let should_stop_clone = Arc::clone(&stop_printing_child_output);
182-
let stderr_thread = thread::spawn(move || {
183-
let reader = BufReader::new(stderr);
184-
for line in reader.lines() {
185-
if should_stop_clone.load(Ordering::Relaxed) {
186-
break;
187-
}
188-
if let Ok(line) = line {
189-
let _ = term.write_line(&line);
190-
}
191-
}
192-
});
193-
194-
wait_for_healthy_network(nd).await?;
195-
196-
// Signal threads to stop
197-
stop_printing_child_output.store(true, Ordering::Relaxed);
198-
199-
// Don't join the threads - they're likely blocked on I/O waiting for the next line.
200-
// They'll terminate naturally when the pipes close, or when the next line arrives.
201-
drop(stdout_thread);
202-
drop(stderr_thread);
203-
204-
Ok(())
205-
}
206-
207-
#[allow(clippy::result_large_err)]
208-
fn run_in_background() -> Result<Child, CommandError> {
209-
let exe = std::env::current_exe().expect("Failed to get current executable.");
210-
let mut cmd = Command::new(exe);
211-
// Skip 1 because arg0 is this executable's path.
212-
cmd.args(std::env::args().skip(1).filter(|a| !a.eq("--background")))
213-
.stdin(Stdio::null())
214-
.stdout(Stdio::piped()) // Capture stdout so we can relay it
215-
.stderr(Stdio::piped()); // Capture stderr so we can relay it
216-
217-
// On Unix, create a new process group so the child can continue running
218-
// independently after the run command exits
219-
#[cfg(unix)]
220-
{
221-
use std::os::unix::process::CommandExt;
222-
cmd.process_group(0);
223-
}
224-
225-
let child = cmd.spawn().expect("Failed to spawn child process.");
226-
Ok(child)
227-
}
228-
229-
async fn retry_with_timeout<F, Fut, T>(mut f: F, max_retries: usize, delay_ms: u64) -> Option<T>
230-
where
231-
F: FnMut() -> Fut,
232-
Fut: std::future::Future<Output = Option<T>> + Send,
233-
{
234-
let mut retries = 0;
235-
loop {
236-
if let Some(result) = f().await {
237-
return Some(result);
238-
}
239-
if retries > max_retries {
240-
return None;
241-
}
242-
retries += 1;
243-
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
244-
}
245-
}
246-
247-
async fn wait_for_healthy_network(nd: &NetworkDirectory) -> Result<(), CommandError> {
248-
let max_retries = 45;
249-
let delay_ms = 1000;
250-
251-
// Wait for network descriptor to be written
252-
let network = retry_with_timeout(
253-
|| async move { nd.load_network_descriptor().await.unwrap_or(None) },
254-
max_retries,
255-
delay_ms,
256-
)
257-
.await
258-
.ok_or(CommandError::Timeout {
259-
err: "timed out waiting for network descriptor".to_string(),
260-
})?;
261-
262-
// Wait for network to report itself healthy
263-
let port = network.gateway.port;
264-
let agent = Agent::builder()
265-
.with_url(format!("http://127.0.0.1:{port}"))
266-
.build()?;
267-
retry_with_timeout(
268-
|| {
269-
let agent = agent.clone();
270-
async move {
271-
let status = agent.status().await;
272-
if let Ok(status) = status
273-
&& matches!(&status.replica_health_status, Some(status) if status == "healthy")
274-
{
275-
return Some(());
276-
}
277-
278-
None
279-
}
280-
},
281-
max_retries,
282-
delay_ms,
283-
)
284-
.await
285-
.ok_or(CommandError::Timeout {
286-
err: "timed out waiting for network to start".to_string(),
287-
})
288-
}

crates/icp-cli/src/commands/network/stop.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ pub async fn exec(ctx: &Context, cmd: &Cmd) -> Result<(), CommandError> {
8888
.with_write(async |root| {
8989
let pid_file = root.background_network_runner_pid_file();
9090
let _ = remove_file(&pid_file); // Cleanup is nice, but optional
91+
let descriptor_file = root.network_descriptor_path();
92+
// Desciptor file must be deleted to allow the network to be restarted, but if it doesn't exist, that's fine too
93+
let _ = remove_file(&descriptor_file);
94+
9195
Ok::<_, CommandError>(())
9296
})
9397
.await??;

crates/icp-cli/tests/network_tests.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ async fn network_run_and_stop_background() {
314314
.args(["network", "run", "my-network", "--background"])
315315
.assert()
316316
.success()
317-
.stdout(contains("Created instance with id")); // part of network start output
317+
.stderr(contains("Created instance with id")); // part of network start output
318318

319319
let network = ctx.wait_for_network_descriptor(&project_dir, "my-network");
320320

@@ -332,7 +332,7 @@ async fn network_run_and_stop_background() {
332332
);
333333

334334
let pid_contents = read_to_string(&pid_file_path).expect("Failed to read PID file");
335-
let background_controller_pid: Pid = pid_contents
335+
let background_pocketic_pid: Pid = pid_contents
336336
.trim()
337337
.parse()
338338
.expect("PID file should contain a valid process ID");
@@ -357,7 +357,7 @@ async fn network_run_and_stop_background() {
357357
.success()
358358
.stdout(contains(format!(
359359
"Stopping background network (PID: {})",
360-
background_controller_pid
360+
background_pocketic_pid
361361
)))
362362
.stdout(contains("Network stopped successfully"));
363363

@@ -367,11 +367,11 @@ async fn network_run_and_stop_background() {
367367
"PID file should be removed after stopping"
368368
);
369369

370-
// Verify controller process is no longer running
370+
// Verify pocketic process is no longer running
371371
let mut system = System::new();
372-
system.refresh_processes(ProcessesToUpdate::Some(&[background_controller_pid]), true);
372+
system.refresh_processes(ProcessesToUpdate::Some(&[background_pocketic_pid]), true);
373373
assert!(
374-
system.process(background_controller_pid).is_none(),
374+
system.process(background_pocketic_pid).is_none(),
375375
"Process should no longer be running"
376376
);
377377

crates/icp/src/network/directory.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,16 @@ impl NetworkRootPaths {
214214
pub fn pocketic_port_file(&self) -> PathBuf {
215215
self.pocketic_dir().join("port")
216216
}
217+
218+
/// PocketIC writes its stdout to this file.
219+
pub fn pocketic_stdout_file(&self) -> PathBuf {
220+
self.pocketic_dir().join("stdout.log")
221+
}
222+
223+
/// PocketIC writes its stderr to this file.
224+
pub fn pocketic_stderr_file(&self) -> PathBuf {
225+
self.pocketic_dir().join("stderr.log")
226+
}
217227
}
218228

219229
impl PathsAccess for NetworkRootPaths {

crates/icp/src/network/managed/pocketic.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use pocket_ic::common::rest::{
66
};
77
use reqwest::Url;
88
use snafu::prelude::*;
9+
use std::process::Stdio;
910
use time::OffsetDateTime;
1011

1112
use crate::prelude::*;
@@ -69,14 +70,31 @@ pub struct PocketIcInstance {
6970
pub root_key: String,
7071
}
7172

72-
pub fn spawn_pocketic(pocketic_path: &Path, port_file: &Path) -> tokio::process::Child {
73+
pub fn spawn_pocketic(
74+
pocketic_path: &Path,
75+
port_file: &Path,
76+
stdout_file: &Path,
77+
stderr_file: &Path,
78+
background: bool,
79+
) -> tokio::process::Child {
7380
let mut cmd = tokio::process::Command::new(pocketic_path);
7481
cmd.arg("--port-file");
7582
cmd.arg(port_file.as_os_str());
7683
cmd.args(["--ttl", "2592000", "--log-levels", "error"]);
7784

78-
cmd.stdout(std::process::Stdio::inherit());
79-
cmd.stderr(std::process::Stdio::inherit());
85+
if background {
86+
eprintln!("For background mode, PocketIC output will be redirected:");
87+
eprintln!(" stdout: {}", stdout_file);
88+
eprintln!(" stderr: {}", stderr_file);
89+
let stdout = std::fs::File::create(stdout_file).expect("Failed to create stdout file.");
90+
let stderr = std::fs::File::create(stderr_file).expect("Failed to create stderr file.");
91+
cmd.stdout(Stdio::from(stdout));
92+
cmd.stderr(Stdio::from(stderr));
93+
} else {
94+
cmd.stdout(Stdio::inherit());
95+
cmd.stderr(Stdio::inherit());
96+
}
97+
8098
#[cfg(unix)]
8199
{
82100
cmd.process_group(0);

0 commit comments

Comments
 (0)