Skip to content

Commit 04aa3bb

Browse files
authored
Merge pull request #2248 from itowlson/terminate-child-triggers-on-trigger-exit
Cancel external triggers if one of multiple fails on start
2 parents 96498fe + 9ad3591 commit 04aa3bb

File tree

2 files changed

+84
-23
lines changed

2 files changed

+84
-23
lines changed

src/commands/external.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,15 @@ pub async fn execute_external_subcommand(
7979
let mut command = Command::new(binary);
8080
command.args(args);
8181
command.envs(get_env_vars_map()?);
82+
command.kill_on_drop(true);
8283

8384
let badger = BadgerChecker::start(&plugin_name, plugin_version, SPIN_VERSION);
8485

8586
log::info!("Executing command {:?}", command);
8687
// Allow user to interact with stdio/stdout of child process
87-
let status = command.status().await?;
88+
let mut child = command.spawn()?;
89+
set_kill_on_ctrl_c(&child);
90+
let status = child.wait().await?;
8891
log::info!("Exiting process with {}", status);
8992

9093
report_badger_result(badger).await;
@@ -98,6 +101,18 @@ pub async fn execute_external_subcommand(
98101
Ok(())
99102
}
100103

104+
#[cfg(windows)]
105+
fn set_kill_on_ctrl_c(_child: &tokio::process::Child) {}
106+
107+
#[cfg(not(windows))]
108+
fn set_kill_on_ctrl_c(child: &tokio::process::Child) {
109+
if let Some(pid) = child.id().map(|id| nix::unistd::Pid::from_raw(id as i32)) {
110+
_ = ctrlc::set_handler(move || {
111+
_ = nix::sys::signal::kill(pid, nix::sys::signal::SIGTERM);
112+
});
113+
}
114+
}
115+
101116
async fn ensure_plugin_available(
102117
plugin_name: &str,
103118
plugin_store: &PluginStore,

src/commands/up.rs

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use std::{
88

99
use anyhow::{anyhow, bail, Context, Result};
1010
use clap::{CommandFactory, Parser};
11-
use itertools::Itertools;
1211
use reqwest::Url;
1312
use spin_app::locked::LockedApp;
1413
use spin_common::ui::quoted_path;
@@ -17,14 +16,23 @@ use spin_oci::OciLoader;
1716
use spin_trigger::cli::{SPIN_LOCAL_APP_DIR, SPIN_LOCKED_URL, SPIN_WORKING_DIR};
1817
use tempfile::TempDir;
1918

20-
use futures::StreamExt;
21-
2219
use crate::opts::*;
2320

2421
use self::app_source::{AppSource, ResolvedAppSource};
2522

2623
const APPLICATION_OPT: &str = "APPLICATION";
2724

25+
// If multiple triggers start very close together, there is a race condition
26+
// where if one trigger fails during startup, other external triggers may
27+
// not have their cancellation hooked up. (kill_on_drop doesn't fully solve
28+
// this because it kills the Spin process but that doesn't cascade to the
29+
// child plugin trigger process.) So add a hopefully insignificant delay
30+
// between them to reduce the chance of this happening.
31+
const MULTI_TRIGGER_START_OFFSET: tokio::time::Duration = tokio::time::Duration::from_millis(20);
32+
// And just in case wait a few moments before performing the first "have
33+
// any exited" check.
34+
const MULTI_TRIGGER_LET_ALL_START: tokio::time::Duration = tokio::time::Duration::from_millis(500);
35+
2836
/// Start the Fermyon runtime.
2937
#[derive(Parser, Debug, Default)]
3038
#[clap(
@@ -181,20 +189,30 @@ impl UpCommand {
181189
local_app_dir,
182190
};
183191

184-
let mut trigger_processes = self.start_trigger_processes(trigger_cmds, run_opts).await?;
192+
let trigger_processes = self.start_trigger_processes(trigger_cmds, run_opts).await?;
193+
let is_multi = trigger_processes.len() > 1;
194+
let pids = get_pids(&trigger_processes);
185195

186-
set_kill_on_ctrl_c(&trigger_processes)?;
196+
set_kill_on_ctrl_c(&pids)?;
187197

188-
let mut trigger_tasks = trigger_processes
189-
.iter_mut()
190-
.map(|ch| ch.wait())
191-
.collect::<futures::stream::FuturesUnordered<_>>();
198+
let trigger_tasks = trigger_processes
199+
.into_iter()
200+
.map(|mut ch| tokio::task::spawn(async move { ch.wait().await }))
201+
.collect::<Vec<_>>();
202+
203+
if is_multi {
204+
tokio::time::sleep(MULTI_TRIGGER_LET_ALL_START).await;
205+
}
192206

193-
let first_to_finish = trigger_tasks.next().await;
207+
let (first_to_finish, _index, _rest) = futures::future::select_all(trigger_tasks).await;
194208

195-
if let Some(process_result) = first_to_finish {
209+
if let Ok(process_result) = first_to_finish {
196210
let status = process_result?;
197211
if !status.success() {
212+
if is_multi {
213+
println!("A trigger exited unexpectedly. Terminating.");
214+
kill_child_processes(&pids);
215+
}
198216
return Err(crate::subprocess::ExitStatusError::new(status).into());
199217
}
200218
}
@@ -223,6 +241,7 @@ impl UpCommand {
223241
trigger_cmds: Vec<Vec<String>>,
224242
run_opts: RunTriggerOpts,
225243
) -> anyhow::Result<Vec<tokio::process::Child>> {
244+
let is_multi = trigger_cmds.len() > 1;
226245
let mut trigger_processes = Vec::with_capacity(trigger_cmds.len());
227246

228247
for cmd in trigger_cmds {
@@ -231,6 +250,13 @@ impl UpCommand {
231250
.await
232251
.context("Failed to start trigger process")?;
233252
trigger_processes.push(child);
253+
254+
if is_multi {
255+
// Allow time for the child `spin` process to launch the trigger
256+
// and hook up its cancellation. Mitigates the race condition
257+
// noted on the constant (see there for more info).
258+
tokio::time::sleep(MULTI_TRIGGER_START_OFFSET).await;
259+
}
234260
}
235261

236262
Ok(trigger_processes)
@@ -387,25 +413,45 @@ impl UpCommand {
387413
}
388414

389415
#[cfg(windows)]
390-
fn set_kill_on_ctrl_c(trigger_processes: &Vec<tokio::process::Child>) -> Result<(), anyhow::Error> {
416+
fn set_kill_on_ctrl_c(_pids: &[usize]) -> Result<(), anyhow::Error> {
391417
Ok(())
392418
}
393419

394420
#[cfg(not(windows))]
395-
fn set_kill_on_ctrl_c(trigger_processes: &[tokio::process::Child]) -> Result<(), anyhow::Error> {
421+
fn set_kill_on_ctrl_c(pids: &[nix::unistd::Pid]) -> Result<(), anyhow::Error> {
422+
let pids = pids.to_owned();
423+
ctrlc::set_handler(move || {
424+
kill_child_processes(&pids);
425+
})?;
426+
Ok(())
427+
}
428+
429+
#[cfg(windows)]
430+
fn get_pids(_trigger_processes: &[tokio::process::Child]) -> Vec<usize> {
431+
vec![]
432+
}
433+
434+
#[cfg(not(windows))]
435+
fn get_pids(trigger_processes: &[tokio::process::Child]) -> Vec<nix::unistd::Pid> {
436+
use itertools::Itertools;
396437
// https://github.com/nix-rust/nix/issues/656
397-
let pids = trigger_processes
438+
trigger_processes
398439
.iter()
399440
.flat_map(|child| child.id().map(|id| nix::unistd::Pid::from_raw(id as i32)))
400-
.collect_vec();
401-
ctrlc::set_handler(move || {
402-
for pid in &pids {
403-
if let Err(err) = nix::sys::signal::kill(*pid, nix::sys::signal::SIGTERM) {
404-
tracing::warn!("Failed to kill trigger handler process: {:?}", err)
405-
}
441+
.collect_vec()
442+
}
443+
444+
#[cfg(windows)]
445+
fn kill_child_processes(_pids: &[usize]) {}
446+
447+
#[cfg(not(windows))]
448+
fn kill_child_processes(pids: &[nix::unistd::Pid]) {
449+
// https://github.com/nix-rust/nix/issues/656
450+
for pid in pids {
451+
if let Err(err) = nix::sys::signal::kill(*pid, nix::sys::signal::SIGTERM) {
452+
tracing::warn!("Failed to kill trigger handler process: {:?}", err)
406453
}
407-
})?;
408-
Ok(())
454+
}
409455
}
410456

411457
#[derive(Clone)]

0 commit comments

Comments
 (0)