RooCodeInc · hannesrudolph · Sep 14, 2025 · roomote · Sep 14, 2025 · roomote
@@ -0,0 +1,113 @@
+"use server"
+
+import fs from "fs"
+import { spawn } from "child_process"
+import { revalidatePath } from "next/cache"
+
+import { deleteRun as _deleteRun } from "@roo-code/evals"
+
+import { redisClient } from "@/lib/server/redis"
+
+const RUN_QUEUE_KEY = "evals:run-queue"
+const ACTIVE_RUN_KEY = "evals:active-run"
+const DISPATCH_LOCK_KEY = "evals:dispatcher:lock"
+const ACTIVE_RUN_TTL_SECONDS = 60 * 60 * 12 // 12 hours
+const DISPATCH_LOCK_TTL_SECONDS = 30
+
+async function spawnController(runId: number) {
+	const isRunningInDocker = fs.existsSync("/.dockerenv")
+
+	const dockerArgs = [
+		`--name evals-controller-${runId}`,
+		"--rm",
+		"--network evals_default",
+		"-v /var/run/docker.sock:/var/run/docker.sock",
+		"-v /tmp/evals:/var/log/evals",
+		"-e HOST_EXECUTION_METHOD=docker",
+	]
+
+	const cliCommand = `pnpm --filter @roo-code/evals cli --runId ${runId}`
+
+	const command = isRunningInDocker
+		? `docker run ${dockerArgs.join(" ")} evals-runner sh -c "${cliCommand}"`
+		: cliCommand
+
+	const childProcess = spawn("sh", ["-c", command], {
+		detached: true,
+		stdio: ["ignore", "pipe", "pipe"],
+	})
+
+	// Best-effort logging of controller output
+	try {
+		const logStream = fs.createWriteStream("/tmp/roo-code-evals.log", { flags: "a" })
+		childProcess.stdout?.pipe(logStream)
+		childProcess.stderr?.pipe(logStream)
+	} catch (_error) {
+		// Intentionally ignore logging pipe errors
+	}
+
+	childProcess.unref()
+}
+
+/**
+ * Enqueue a run into the global FIFO (idempotent).
+ */
+export async function enqueueRun(runId: number) {
+	const redis = await redisClient()
+	const exists = await redis.lPos(RUN_QUEUE_KEY, runId.toString())
+	if (exists === null) {
+		await redis.rPush(RUN_QUEUE_KEY, runId.toString())
+	}
+	revalidatePath("/runs")
+}
+
+/**
+ * Dispatcher: if no active run, pop next from queue and start controller.
+ * Uses a short-lived lock to avoid races between concurrent dispatchers.
+ */
+export async function dispatchNextRun() {
+	const redis = await redisClient()
+
+	// Try to acquire dispatcher lock
+	const locked = await redis.set(DISPATCH_LOCK_KEY, "1", { NX: true, EX: DISPATCH_LOCK_TTL_SECONDS })
+	if (!locked) return
+
+	try {
+		// If an active run is present, nothing to do.
+		const active = await redis.get(ACTIVE_RUN_KEY)
+		if (active) return
+
+		const nextId = await redis.lPop(RUN_QUEUE_KEY)
+		if (!nextId) return
+
+		const ok = await redis.set(ACTIVE_RUN_KEY, nextId, { NX: true, EX: ACTIVE_RUN_TTL_SECONDS })
+		if (!ok) {
+			// put it back to preserve order and exit
+			await redis.lPush(RUN_QUEUE_KEY, nextId)
+			return
+		}
+
+		await spawnController(Number(nextId))
+	} finally {
+		await redis.del(DISPATCH_LOCK_KEY).catch(() => {})
-		await redis.del(DISPATCH_LOCK_KEY).catch(() => {})
+		await redis.del(DISPATCH_LOCK_KEY).catch((err) => {
+			console.error("Failed to delete dispatcher lock key:", err)
+		})
-		await redis.del(DISPATCH_LOCK_KEY).catch(() => {})
+		await redis.del(DISPATCH_LOCK_KEY).catch((err) => {
+			console.error("Failed to delete dispatcher lock:", err)
+		})
-		await redis.del(DISPATCH_LOCK_KEY).catch(() => {})
+		await redis.del(DISPATCH_LOCK_KEY).catch((err) => {
+			console.error("Failed to delete dispatcher lock key:", err)
+		})
-		await redis.del(DISPATCH_LOCK_KEY).catch(() => {})
+		await redis.del(DISPATCH_LOCK_KEY).catch((err) => {
+			console.error("Failed to delete dispatcher lock:", err)
+		})
+	}
+}
+
+/**
+ * Return 1-based position in the global FIFO queue, or null if not queued.
+ */
+export async function getQueuePosition(runId: number): Promise<number | null> {
+	const redis = await redisClient()
+	const idx = await redis.lPos(RUN_QUEUE_KEY, runId.toString())
+	return idx === null ? null : idx + 1
+}
+
+/**
+ * Remove a queued run from the FIFO queue and delete the run record.
+ */
+export async function cancelQueuedRun(runId: number) {
+	const redis = await redisClient()
+	await redis.lRem(RUN_QUEUE_KEY, 1, runId.toString())
+	await _deleteRun(runId)
+	revalidatePath("/runs")
+}
@@ -1,9 +1,9 @@
 "use server"
 
 import * as path from "path"
-import fs from "fs"
 import { fileURLToPath } from "url"
-import { spawn } from "child_process"
+
+import { enqueueRun, dispatchNextRun } from "@/actions/queue"
 
 import { revalidatePath } from "next/cache"
 import pMap from "p-map"
@@ -52,41 +52,9 @@ export async function createRun({ suite, exercises = [], systemPrompt, timeout,
 	revalidatePath("/runs")
 
 	try {
-		const isRunningInDocker = fs.existsSync("/.dockerenv")
-
-		const dockerArgs = [
-			`--name evals-controller-${run.id}`,
-			"--rm",
-			"--network evals_default",
-			"-v /var/run/docker.sock:/var/run/docker.sock",
-			"-v /tmp/evals:/var/log/evals",
-			"-e HOST_EXECUTION_METHOD=docker",
-		]
-
-		const cliCommand = `pnpm --filter @roo-code/evals cli --runId ${run.id}`
-
-		const command = isRunningInDocker
-			? `docker run ${dockerArgs.join(" ")} evals-runner sh -c "${cliCommand}"`
-			: cliCommand
-
-		console.log("spawn ->", command)
-
-		const childProcess = spawn("sh", ["-c", command], {
-			detached: true,
-			stdio: ["ignore", "pipe", "pipe"],
-		})
-
-		const logStream = fs.createWriteStream("/tmp/roo-code-evals.log", { flags: "a" })
-
-		if (childProcess.stdout) {
-			childProcess.stdout.pipe(logStream)
-		}
-
-		if (childProcess.stderr) {
-			childProcess.stderr.pipe(logStream)
-		}
-
-		childProcess.unref()
+		// Enqueue the run and attempt to dispatch if no active run exists.
+		await enqueueRun(run.id)
+		await dispatchNextRun()
 	} catch (error) {
 		console.error(error)
 	}

@@ -1,10 +1,13 @@
 import { useCallback, useState, useRef } from "react"
 import Link from "next/link"
-import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash } from "lucide-react"
+import { useQuery } from "@tanstack/react-query"
+import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash, XCircle } from "lucide-react"
 
 import type { Run as EvalsRun, TaskMetrics as EvalsTaskMetrics } from "@roo-code/evals"
 
 import { deleteRun } from "@/actions/runs"
+import { getHeartbeat } from "@/actions/heartbeat"
+import { getQueuePosition, cancelQueuedRun } from "@/actions/queue"
 import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
 import { useCopyRun } from "@/hooks/use-copy-run"
 import {
@@ -35,6 +38,23 @@ export function Run({ run, taskMetrics }: RunProps) {
 	const continueRef = useRef<HTMLButtonElement>(null)
 	const { isPending, copyRun, copied } = useCopyRun(run.id)
 
+	// Poll heartbeat and queue position for status column
+	const { data: heartbeat } = useQuery({
+		queryKey: ["getHeartbeat", run.id],
+		queryFn: () => getHeartbeat(run.id),
+		refetchInterval: 10_000,
+	})
+
+	const { data: queuePosition } = useQuery({
+		queryKey: ["getQueuePosition", run.id],
+		queryFn: () => getQueuePosition(run.id),
+		refetchInterval: 10_000,
+	})
+
+	const isCompleted = !!run.taskMetricsId
+	const isRunning = !!heartbeat
+	const isQueued = !isCompleted && !isRunning && queuePosition !== null && queuePosition !== undefined
-	const isQueued = !isCompleted && !isRunning && queuePosition !== null && queuePosition !== undefined
+	const isQueued = !isCompleted && !isRunning && queuePosition != null
-	const isQueued = !isCompleted && !isRunning && queuePosition !== null && queuePosition !== undefined
+	const isQueued = !isCompleted && !isRunning && queuePosition != null
-	const isQueued = !isCompleted && !isRunning && queuePosition !== null && queuePosition !== undefined
+	const isQueued = !isCompleted && !isRunning && queuePosition != null
-	const isQueued = !isCompleted && !isRunning && queuePosition !== null && queuePosition !== undefined
+	const isQueued = !isCompleted && !isRunning && queuePosition != null
+
 	const onConfirmDelete = useCallback(async () => {
 		if (!deleteRunId) {
 			return
@@ -51,6 +71,9 @@ export function Run({ run, taskMetrics }: RunProps) {
 	return (
 		<>
 			<TableRow>
+				<TableCell>
+					{isCompleted ? "Completed" : isRunning ? "Running" : isQueued ? <>Queued (#{queuePosition})</> : ""}
+				</TableCell>
 				<TableCell>{run.model}</TableCell>
 				<TableCell>{run.passed}</TableCell>
 				<TableCell>{run.failed}</TableCell>
@@ -116,6 +139,21 @@ export function Run({ run, taskMetrics }: RunProps) {
 									</div>
 								</DropdownMenuItem>
 							)}
+							{isQueued && (
+								<DropdownMenuItem
+									onClick={async () => {
+										try {
+											await cancelQueuedRun(run.id)
+										} catch (error) {
+											console.error(error)
+										}
+									}}>
+									<div className="flex items-center gap-1">
+										<XCircle />
+										<div>Cancel</div>
+									</div>
+								</DropdownMenuItem>
+							)}
 							<DropdownMenuItem
 								onClick={() => {
 									setDeleteRunId(run.id)

@@ -18,6 +18,7 @@ export function Runs({ runs }: { runs: RunWithTaskMetrics[] }) {
 			<Table className="border border-t-0">
 				<TableHeader>
 					<TableRow>
+						<TableHead>Status</TableHead>
 						<TableHead>Model</TableHead>
 						<TableHead>Passed</TableHead>
 						<TableHead>Failed</TableHead>
@@ -34,7 +35,7 @@ export function Runs({ runs }: { runs: RunWithTaskMetrics[] }) {
 						runs.map(({ taskMetrics, ...run }) => <Row key={run.id} run={run} taskMetrics={taskMetrics} />)
 					) : (
 						<TableRow>
-							<TableCell colSpan={9} className="text-center">
+							<TableCell colSpan={10} className="text-center">
 								No eval runs yet.
 								<Button variant="link" onClick={() => router.push("/runs/new")}>
 									Launch

@@ -0,0 +1,89 @@
+import fs from "node:fs"
+import { spawn } from "node:child_process"
+
+import { redisClient } from "./redis.js"
+import { isDockerContainer } from "./utils.js"
+
+const RUN_QUEUE_KEY = "evals:run-queue"
+const ACTIVE_RUN_KEY = "evals:active-run"
+const DISPATCH_LOCK_KEY = "evals:dispatcher:lock"
+const ACTIVE_RUN_TTL_SECONDS = 60 * 60 * 12 // 12 hours
+const DISPATCH_LOCK_TTL_SECONDS = 30
+
+async function spawnController(runId: number) {
+	const containerized = isDockerContainer()
+
+	const dockerArgs = [
+		`--name evals-controller-${runId}`,
+		"--rm",
+		"--network evals_default",
+		"-v /var/run/docker.sock:/var/run/docker.sock",
+		"-v /tmp/evals:/var/log/evals",
+		"-e HOST_EXECUTION_METHOD=docker",
+	]
+
+	const cliCommand = `pnpm --filter @roo-code/evals cli --runId ${runId}`
+	const command = containerized ? `docker run ${dockerArgs.join(" ")} evals-runner sh -c "${cliCommand}"` : cliCommand
+
+	const childProcess = spawn("sh", ["-c", command], {
+		detached: true,
+		stdio: ["ignore", "pipe", "pipe"],
+	})
+
+	// Best-effort logging of controller output (host path or container path)
+	try {
+		const logStream = fs.createWriteStream("/tmp/roo-code-evals.log", { flags: "a" })
+		childProcess.stdout?.pipe(logStream)
+		childProcess.stderr?.pipe(logStream)
+	} catch {
+		// ignore logging errors
+	}
+
+	childProcess.unref()
+}
+
+/**
+ * Clear the active-run marker (if any) and try to dispatch the next run in FIFO order.
+ * Uses a short-lived lock to avoid races with other dispatchers (web app or other controllers).
+ */
+export async function finishActiveRunAndDispatch() {
+	const redis = await redisClient()
+
+	// Clear the active run marker first (if exists). We do not care if it was already expired.
+	try {
+		await redis.del(ACTIVE_RUN_KEY)
+	} catch {
+		// ignore
+	}
+
+	// Try to acquire dispatcher lock (NX+EX). If we don't get it, another dispatcher will handle it.
+	const locked = await redis.set(DISPATCH_LOCK_KEY, "1", { NX: true, EX: DISPATCH_LOCK_TTL_SECONDS })
+	if (!locked) return
+
+	try {
+		// If another process re-marked active-run meanwhile, bail out.
+		const active = await redis.get(ACTIVE_RUN_KEY)
+		if (active) return
+
+		// Pop next run id from the head of the queue.
+		const nextId = await redis.lPop(RUN_QUEUE_KEY)
+		if (!nextId) return
+
+		// Mark as active (with TTL) to provide crash safety.
+		const ok = await redis.set(ACTIVE_RUN_KEY, nextId, { NX: true, EX: ACTIVE_RUN_TTL_SECONDS })
+		if (!ok) {
+			// Could not set active (race). Push id back to the head to preserve order and exit.
+			await redis.lPush(RUN_QUEUE_KEY, nextId)
+			return
+		}
+
+		// Spawn the next controller in background.
+		await spawnController(Number(nextId))
+	} finally {
+		try {
+			await redis.del(DISPATCH_LOCK_KEY)
+		} catch {
+			// ignore
+		}
+	}
+}
@@ -6,6 +6,7 @@ import { EVALS_REPO_PATH } from "../exercises/index.js"
 import { Logger, getTag, isDockerContainer, resetEvalsRepo, commitEvalsRepoChanges } from "./utils.js"
 import { startHeartbeat, stopHeartbeat } from "./redis.js"
 import { processTask, processTaskInContainer } from "./runTask.js"
+import { finishActiveRunAndDispatch } from "./queue.js"
 
 export const runEvals = async (runId: number) => {
 	const run = await findRun(runId)
@@ -67,6 +68,7 @@ export const runEvals = async (runId: number) => {
 	} finally {
 		logger.info("cleaning up")
 		stopHeartbeat(run.id, heartbeat)
+		await finishActiveRunAndDispatch()
 		logger.close()
 	}
 }