Skip to content

Commit 1fce09c

Browse files
fix: use tini with unshare to preserve signals (e.g oom) (#7471)
* use tini with unshare to preserve signals (e.g oom) * fix ci * ci as nsjail * simplify * fix flaky go integration test --------- Co-authored-by: Ruben Fiszel <[email protected]>
1 parent cf90bd4 commit 1fce09c

File tree

4 files changed

+80
-12
lines changed

4 files changed

+80
-12
lines changed

.github/workflows/docker-image.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,9 +593,10 @@ jobs:
593593
${{ steps.meta-ee-public.outputs.labels }}
594594
595595
build_ee_slim:
596-
if: ${{ startsWith(github.ref, 'refs/tags/v') }} || ((github.event_name != 'workflow_dispatch') || (github.event.inputs.slim))
597596
needs: [build_ee]
598597
runs-on: ubicloud
598+
if: (github.event_name != 'pull_request') && ((github.event_name != 'workflow_dispatch') || (github.event.inputs.ee || github.event.inputs.slim))
599+
599600
steps:
600601
- uses: actions/checkout@v4
601602
with:

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ ENV PATH /usr/local/bin:/root/.local/bin:/tmp/.local/bin:$PATH
129129

130130

131131
RUN apt-get update \
132-
&& apt-get install -y --no-install-recommends netbase tzdata ca-certificates wget curl jq unzip build-essential unixodbc xmlsec1 software-properties-common \
132+
&& apt-get install -y --no-install-recommends netbase tzdata ca-certificates wget curl jq unzip build-essential unixodbc xmlsec1 software-properties-common tini \
133133
&& apt-get clean \
134134
&& rm -rf /var/lib/apt/lists/*
135135

backend/windmill-worker/src/common.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,19 @@ pub fn build_command_with_isolation(program: &str, args: &[&str]) -> Command {
642642
cmd.arg(flag);
643643
}
644644

645-
cmd.arg("--");
645+
// If tini is available, use it for proper PID 1 signal handling
646+
// (especially OOM exit codes which return 137 instead of sigprocmask errors).
647+
// Note: --fork should already be in the flags for proper namespace setup.
648+
if let Some(tini_path) = crate::TINI_AVAILABLE.as_ref() {
649+
cmd.arg("--");
650+
cmd.arg(tini_path);
651+
cmd.arg("-s");
652+
cmd.arg("--");
653+
} else {
654+
// Without tini, just run the command directly (--fork is in flags)
655+
cmd.arg("--");
656+
}
657+
646658
cmd.arg(program);
647659
cmd.args(args);
648660
cmd

backend/windmill-worker/src/worker.rs

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -315,24 +315,85 @@ lazy_static::lazy_static! {
315315
.and_then(|x| x.parse::<bool>().ok())
316316
.unwrap_or(false);
317317

318+
pub static ref UNSHARE_TINI_PATH: String = {
319+
std::env::var("UNSHARE_TINI_PATH").unwrap_or_else(|_| "tini".to_string())
320+
};
321+
322+
// --fork is required for unshare to work with --pid --mount-proc.
323+
// When tini is available, it runs as PID 1 inside the forked namespace for proper signal handling.
318324
pub static ref UNSHARE_ISOLATION_FLAGS: String = {
319325
std::env::var("UNSHARE_ISOLATION_FLAGS")
320326
.unwrap_or_else(|_| "--user --map-root-user --pid --fork --mount-proc".to_string())
321327
};
322328

329+
// Check if tini is available for proper PID 1 handling in unshare namespaces.
330+
// tini handles OOM signals correctly, returning exit code 137 instead of sigprocmask errors.
331+
pub static ref TINI_AVAILABLE: Option<String> = {
332+
let tini_path = UNSHARE_TINI_PATH.as_str();
333+
let test_result = std::process::Command::new(tini_path)
334+
.args(["-s", "--", "true"])
335+
.output();
336+
337+
match test_result {
338+
Ok(output) if output.status.success() => {
339+
tracing::info!("tini available at: {}", tini_path);
340+
Some(tini_path.to_string())
341+
}
342+
Ok(output) => {
343+
let stderr = String::from_utf8_lossy(&output.stderr);
344+
tracing::warn!(
345+
"tini test failed: {}. Proceeding without tini (OOM exit codes may be incorrect).",
346+
stderr.trim()
347+
);
348+
None
349+
}
350+
Err(e) => {
351+
if e.kind() == std::io::ErrorKind::NotFound {
352+
tracing::warn!(
353+
"tini not found at '{}'. Install tini for correct OOM exit codes, or set UNSHARE_TINI_PATH.",
354+
tini_path
355+
);
356+
} else {
357+
tracing::warn!(
358+
"Failed to test tini: {}. Proceeding without tini.",
359+
e
360+
);
361+
}
362+
None
363+
}
364+
}
365+
};
366+
323367
pub static ref UNSHARE_PATH: Option<String> = {
324368
let flags = UNSHARE_ISOLATION_FLAGS.as_str();
325369
let mut test_cmd_args: Vec<&str> = flags.split_whitespace().collect();
326-
test_cmd_args.push("--");
327-
test_cmd_args.push("true");
370+
371+
// Build the test command based on whether tini is available
372+
// Note: --fork should already be in the flags for proper namespace setup
373+
if let Some(tini_path) = TINI_AVAILABLE.as_ref() {
374+
// Test with tini: unshare <flags> -- tini -s -- true
375+
test_cmd_args.push("--");
376+
test_cmd_args.push(tini_path.as_str());
377+
test_cmd_args.push("-s");
378+
test_cmd_args.push("--");
379+
test_cmd_args.push("true");
380+
} else {
381+
// Fallback without tini: unshare <flags> -- true
382+
test_cmd_args.push("--");
383+
test_cmd_args.push("true");
384+
}
328385

329386
let test_result = std::process::Command::new("unshare")
330387
.args(&test_cmd_args)
331388
.output();
332389

333390
match test_result {
334391
Ok(output) if output.status.success() => {
335-
tracing::info!("PID namespace isolation enabled. Flags: {}", flags);
392+
if TINI_AVAILABLE.is_some() {
393+
tracing::info!("PID namespace isolation enabled with tini. Flags: {}", flags);
394+
} else {
395+
tracing::info!("PID namespace isolation enabled. Flags: {}", flags);
396+
}
336397
Some("unshare".to_string())
337398
},
338399
Ok(output) => {
@@ -1140,12 +1201,6 @@ pub async fn run_worker(
11401201
if *ENABLE_UNSHARE_PID {
11411202
// Access UNSHARE_PATH to trigger lazy_static initialization and test
11421203
let _ = &*UNSHARE_PATH;
1143-
1144-
tracing::info!(
1145-
worker = %worker_name, hostname = %hostname,
1146-
"PID namespace isolation enabled via unshare with flags: {}",
1147-
UNSHARE_ISOLATION_FLAGS.as_str()
1148-
);
11491204
}
11501205

11511206
let start_time = Instant::now();

0 commit comments

Comments
 (0)