From 15335949358c3ceb70ee60b3632fd8774dbe865e Mon Sep 17 00:00:00 2001 From: cte Date: Fri, 6 Jun 2025 14:12:12 -0700 Subject: [PATCH 1/2] Prevent hanging processes from preventing container cleanup --- packages/evals/src/cli/runTask.ts | 40 +++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index d27f4bac09..9027546c8f 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -20,6 +20,13 @@ import { exercisesPath } from "../exercises/index.js" import { getTag, isDockerContainer } from "./utils.js" +class SubprocessTimeoutError extends Error { + constructor(timeout: number) { + super(`Subprocess timeout after ${timeout}ms`) + this.name = "SubprocessTimeoutError" + } +} + type RunTaskOptions = { run: Run task: Task @@ -196,7 +203,9 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions) => { await updateTask(task.id, { finishedAt: new Date() }) } - if (!isClientDisconnected) { + if (isClientDisconnected) { + logError("client disconnected before task finished") + } else { if (rooTaskId) { log("closing task") client.sendCommand({ commandName: TaskCommandName.CloseTask, data: rooTaskId }) @@ -206,6 +215,33 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions) => { client.disconnect() } + log("waiting for subprocess to finish") controller.abort() - await subprocess + + // Wait for subprocess to finish gracefully, with a timeout. + const SUBPROCESS_TIMEOUT = 10_000 + + try { + await Promise.race([ + subprocess, + new Promise((_, reject) => + setTimeout(() => reject(new SubprocessTimeoutError(SUBPROCESS_TIMEOUT)), SUBPROCESS_TIMEOUT), + ), + ]) + + log("subprocess finished gracefully") + } catch (error) { + if (error instanceof SubprocessTimeoutError) { + logError("subprocess did not finish within timeout, force killing") + + try { + await execa("kill", ["-9", subprocess.pid?.toString() || ""], { reject: false }) + log("subprocess force killed") + } catch (killError) { + logError("failed to force kill subprocess:", killError) + } + } else { + throw error + } + } } From 6b1d8398e11b20d03062263f13ab635c0a600dcb Mon Sep 17 00:00:00 2001 From: cte Date: Fri, 6 Jun 2025 17:06:35 -0700 Subject: [PATCH 2/2] PR feedback --- packages/evals/src/cli/runTask.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index 9027546c8f..c097a4b78f 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -235,10 +235,13 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions) => { logError("subprocess did not finish within timeout, force killing") try { - await execa("kill", ["-9", subprocess.pid?.toString() || ""], { reject: false }) - log("subprocess force killed") + if (subprocess.kill("SIGKILL")) { + log("SIGKILL sent to subprocess") + } else { + logError("failed to send SIGKILL to subprocess") + } } catch (killError) { - logError("failed to force kill subprocess:", killError) + logError("subprocess.kill(SIGKILL) failed:", killError) } } else { throw error