Skip to content

Commit 12bcd17

Browse files
authored
🤖 Fix SSH exec timeout to include connection establishment (#452)
Fixes a bug where SSH exec timeout was not comprehensive - it only killed the SSH process after timeout but didn't configure SSH itself to respect the timeout during connection establishment. ## Problem When SSH connections were hanging during: - DNS lookup - TCP handshake - SSH authentication - Other connection establishment phases ...commands could take much longer than the specified timeout because SSH has its own default timeout (often 75+ seconds) that wasn't being configured. ## Solution Added three SSH configuration options when a timeout is specified: 1. **`ConnectTimeout`** - Set to the configured timeout value to ensure SSH respects the timeout during connection establishment 2. **`ServerAliveInterval=5`** - Send keepalive packets every 5 seconds to detect dead connections 3. **`ServerAliveCountMax=2`** - Consider connection dead after 2 missed keepalives (10 second detection window) These options work together to ensure: - Connection establishment can't hang indefinitely - Established connections that die are detected quickly - The overall timeout is respected from the moment the `ssh` command starts ## Testing - Added integration test that verifies timeout behavior (sleeps 10s with 1s timeout) - Verifies command returns `EXIT_CODE_TIMEOUT` (-998) in ~1 second - All changes pass typecheck and linting _Generated with `cmux`_
1 parent 368aed4 commit 12bcd17

File tree

2 files changed

+59
-12
lines changed

2 files changed

+59
-12
lines changed

src/runtime/SSHRuntime.ts

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,16 @@ export class SSHRuntime implements Runtime {
9898
parts.push(command);
9999

100100
// Join all parts with && to ensure each step succeeds before continuing
101-
const fullCommand = parts.join(" && ");
101+
let fullCommand = parts.join(" && ");
102102

103-
// Wrap in bash -c with shescape for safe shell execution
104-
const remoteCommand = `bash -c ${shescape.quote(fullCommand)}`;
103+
// Wrap remote command with timeout to ensure the command is killed on the remote side
104+
// even if the local SSH client is killed but the ControlMaster connection persists
105+
// Use timeout command with KILL signal
106+
// Set remote timeout slightly longer (+1s) than local timeout to ensure
107+
// the local timeout fires first in normal cases (for cleaner error handling)
108+
// Note: Using BusyBox-compatible syntax (-s KILL) which also works with GNU timeout
109+
const remoteTimeout = Math.ceil(options.timeout) + 1;
110+
fullCommand = `timeout -s KILL ${remoteTimeout} bash -c ${shescape.quote(fullCommand)}`;
105111

106112
// Build SSH args
107113
const sshArgs: string[] = ["-T"];
@@ -125,15 +131,35 @@ export class SSHRuntime implements Runtime {
125131
// ControlMaster=auto: Create master connection if none exists, otherwise reuse
126132
// ControlPath: Unix socket path for multiplexing
127133
// ControlPersist=60: Keep master connection alive for 60s after last session
134+
//
135+
// Socket reuse is safe even with timeouts because:
136+
// - Each SSH command gets its own channel within the multiplexed connection
137+
// - SIGKILL on the client immediately closes that channel
138+
// - Remote sshd terminates the command when the channel closes
139+
// - Multiplexing only shares the TCP connection, not command lifetime
128140
sshArgs.push("-o", "ControlMaster=auto");
129141
sshArgs.push("-o", `ControlPath=${this.controlPath}`);
130142
sshArgs.push("-o", "ControlPersist=60");
131143

132-
sshArgs.push(this.config.host, remoteCommand);
144+
// Set comprehensive timeout options to ensure SSH respects the timeout
145+
// ConnectTimeout: Maximum time to wait for connection establishment (DNS, TCP handshake, SSH auth)
146+
// Cap at 15 seconds - users wanting long timeouts for builds shouldn't wait that long for connection
147+
// ServerAliveInterval: Send keepalive every 5 seconds to detect dead connections
148+
// ServerAliveCountMax: Consider connection dead after 2 missed keepalives (10 seconds total)
149+
// Together these ensure that:
150+
// 1. Connection establishment can't hang indefinitely (max 15s)
151+
// 2. Established connections that die are detected quickly
152+
// 3. The overall command timeout is respected from the moment ssh command starts
153+
sshArgs.push("-o", `ConnectTimeout=${Math.min(Math.ceil(options.timeout), 15)}`);
154+
// Set aggressive keepalives to detect dead connections
155+
sshArgs.push("-o", "ServerAliveInterval=5");
156+
sshArgs.push("-o", "ServerAliveCountMax=2");
157+
158+
sshArgs.push(this.config.host, fullCommand);
133159

134160
// Debug: log the actual SSH command being executed
135161
log.debug(`SSH command: ssh ${sshArgs.join(" ")}`);
136-
log.debug(`Remote command: ${remoteCommand}`);
162+
log.debug(`Remote command: ${fullCommand}`);
137163

138164
// Spawn ssh command
139165
const sshProcess = spawn("ssh", sshArgs, {
@@ -175,16 +201,14 @@ export class SSHRuntime implements Runtime {
175201

176202
// Handle abort signal
177203
if (options.abortSignal) {
178-
options.abortSignal.addEventListener("abort", () => sshProcess.kill());
204+
options.abortSignal.addEventListener("abort", () => sshProcess.kill("SIGKILL"));
179205
}
180206

181207
// Handle timeout
182-
if (options.timeout !== undefined) {
183-
setTimeout(() => {
184-
timedOut = true;
185-
sshProcess.kill();
186-
}, options.timeout * 1000);
187-
}
208+
setTimeout(() => {
209+
timedOut = true;
210+
sshProcess.kill("SIGKILL");
211+
}, options.timeout * 1000);
188212

189213
return { stdout, stderr, stdin, exitCode, duration };
190214
}

tests/runtime/runtime.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,29 @@ describeIntegration("Runtime integration tests", () => {
142142

143143
expect(result.stdout.trim()).toContain(workspace.path);
144144
});
145+
test.concurrent(
146+
"handles timeout correctly",
147+
async () => {
148+
const runtime = createRuntime();
149+
await using workspace = await TestWorkspace.create(runtime, type);
150+
151+
// Command that sleeps longer than timeout
152+
const startTime = performance.now();
153+
const result = await execBuffered(runtime, "sleep 10", {
154+
cwd: workspace.path,
155+
timeout: 1, // 1 second timeout
156+
});
157+
const duration = performance.now() - startTime;
158+
159+
// Exit code should be EXIT_CODE_TIMEOUT (-998)
160+
expect(result.exitCode).toBe(-998);
161+
// Should complete in around 1 second, not 10 seconds
162+
// Allow some margin for overhead (especially on SSH)
163+
expect(duration).toBeLessThan(3000); // 3 seconds max
164+
expect(duration).toBeGreaterThan(500); // At least 0.5 seconds
165+
},
166+
15000
167+
); // 15 second timeout for test (includes workspace creation overhead)
145168
});
146169

147170
describe("readFile() - File reading", () => {

0 commit comments

Comments
 (0)