From 11e842fa9c4297e91158160c84d10c1567a8f591 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 5 Jun 2025 21:48:12 -0700 Subject: [PATCH] Improve the evals "run status" section --- apps/web-evals/package.json | 2 +- .../src/{lib/server => actions}/exercises.ts | 7 +-- apps/web-evals/src/actions/heartbeat.ts | 8 +++ .../src/{lib/server => actions}/runners.ts | 2 +- .../src/{lib/server => actions}/runs.ts | 0 .../src/{lib/server => actions}/tasks.ts | 0 apps/web-evals/src/app/home.tsx | 2 +- .../src/app/runs/[id]/connection-status.tsx | 50 ----------------- .../src/app/runs/[id]/run-status.tsx | 55 +++++++++++++++++++ apps/web-evals/src/app/runs/[id]/run.tsx | 9 +-- apps/web-evals/src/app/runs/new/new-run.tsx | 7 ++- apps/web-evals/src/hooks/use-exercises.ts | 5 -- apps/web-evals/src/hooks/use-run-status.ts | 39 ++++++++++--- apps/web-evals/src/hooks/use-runners.ts | 10 ---- apps/web-evals/src/lib/server/sse-stream.ts | 3 +- packages/evals/src/cli/redis.ts | 16 ++++-- 16 files changed, 121 insertions(+), 94 deletions(-) rename apps/web-evals/src/{lib/server => actions}/exercises.ts (89%) create mode 100644 apps/web-evals/src/actions/heartbeat.ts rename apps/web-evals/src/{lib/server => actions}/runners.ts (75%) rename apps/web-evals/src/{lib/server => actions}/runs.ts (100%) rename apps/web-evals/src/{lib/server => actions}/tasks.ts (100%) delete mode 100644 apps/web-evals/src/app/runs/[id]/connection-status.tsx create mode 100644 apps/web-evals/src/app/runs/[id]/run-status.tsx delete mode 100644 apps/web-evals/src/hooks/use-exercises.ts delete mode 100644 apps/web-evals/src/hooks/use-runners.ts diff --git a/apps/web-evals/package.json b/apps/web-evals/package.json index 80f63ab9ec..99fa68829c 100644 --- a/apps/web-evals/package.json +++ b/apps/web-evals/package.json @@ -5,7 +5,7 @@ "scripts": { "lint": "next lint", "check-types": "tsc -b", - "dev": "scripts/check-services.sh && next dev --turbopack", + "dev": "scripts/check-services.sh && next dev", "format": "prettier --write src", "build": "next build", "start": "next start" diff --git a/apps/web-evals/src/lib/server/exercises.ts b/apps/web-evals/src/actions/exercises.ts similarity index 89% rename from apps/web-evals/src/lib/server/exercises.ts rename to apps/web-evals/src/actions/exercises.ts index ee4255e5fa..8cffa40ba3 100644 --- a/apps/web-evals/src/lib/server/exercises.ts +++ b/apps/web-evals/src/actions/exercises.ts @@ -6,7 +6,9 @@ import { fileURLToPath } from "url" import { type ExerciseLanguage, exerciseLanguages } from "@roo-code/evals" -const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const __dirname = path.dirname(fileURLToPath(import.meta.url)) // /apps/web-evals/src/actions + +const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../evals") export const listDirectories = async (relativePath: string) => { try { @@ -19,9 +21,6 @@ export const listDirectories = async (relativePath: string) => { } } -// __dirname = /evals/apps/web/src/lib/server -const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../../evals") - export const getExercises = async () => { const result = await Promise.all( exerciseLanguages.map(async (language) => { diff --git a/apps/web-evals/src/actions/heartbeat.ts b/apps/web-evals/src/actions/heartbeat.ts new file mode 100644 index 0000000000..a74aa8ee64 --- /dev/null +++ b/apps/web-evals/src/actions/heartbeat.ts @@ -0,0 +1,8 @@ +"use server" + +import { redisClient } from "@/lib/server/redis" + +export const getHeartbeat = async (runId: number) => { + const redis = await redisClient() + return redis.get(`heartbeat:${runId}`) +} diff --git a/apps/web-evals/src/lib/server/runners.ts b/apps/web-evals/src/actions/runners.ts similarity index 75% rename from apps/web-evals/src/lib/server/runners.ts rename to apps/web-evals/src/actions/runners.ts index 324fdbae12..8b7e86b0f3 100644 --- a/apps/web-evals/src/lib/server/runners.ts +++ b/apps/web-evals/src/actions/runners.ts @@ -1,6 +1,6 @@ "use server" -import { redisClient } from "./redis" +import { redisClient } from "@/lib/server/redis" export const getRunners = async (runId: number) => { const redis = await redisClient() diff --git a/apps/web-evals/src/lib/server/runs.ts b/apps/web-evals/src/actions/runs.ts similarity index 100% rename from apps/web-evals/src/lib/server/runs.ts rename to apps/web-evals/src/actions/runs.ts diff --git a/apps/web-evals/src/lib/server/tasks.ts b/apps/web-evals/src/actions/tasks.ts similarity index 100% rename from apps/web-evals/src/lib/server/tasks.ts rename to apps/web-evals/src/actions/tasks.ts diff --git a/apps/web-evals/src/app/home.tsx b/apps/web-evals/src/app/home.tsx index 7468347081..60a0b3bfe8 100644 --- a/apps/web-evals/src/app/home.tsx +++ b/apps/web-evals/src/app/home.tsx @@ -7,7 +7,7 @@ import { Ellipsis, Rocket } from "lucide-react" import type { Run, TaskMetrics } from "@roo-code/evals" -import { deleteRun } from "@/lib/server/runs" +import { deleteRun } from "@/actions/runs" import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters" import { Button, diff --git a/apps/web-evals/src/app/runs/[id]/connection-status.tsx b/apps/web-evals/src/app/runs/[id]/connection-status.tsx deleted file mode 100644 index 1505050b2d..0000000000 --- a/apps/web-evals/src/app/runs/[id]/connection-status.tsx +++ /dev/null @@ -1,50 +0,0 @@ -"use client" - -import type { EventSourceStatus } from "@/hooks/use-event-source" -import { useRunners } from "@/hooks/use-runners" -import { cn } from "@/lib/utils" - -type ConnectionStatusProps = { - status: EventSourceStatus - runId: number -} - -export const ConnectionStatus = (connectionStatus: ConnectionStatusProps) => { - const { data: runners, isLoading } = useRunners(connectionStatus.runId) - const status = isLoading ? "loading" : runners === null ? "dead" : connectionStatus.status - - return ( -
-
-
-
Status:
-
{status}
-
-
-
-
-
-
-
-
Runners:
- {runners && runners.length > 0 && ( -
{runners?.join(", ")}
- )} -
-
- ) -} diff --git a/apps/web-evals/src/app/runs/[id]/run-status.tsx b/apps/web-evals/src/app/runs/[id]/run-status.tsx new file mode 100644 index 0000000000..4b94ef14fa --- /dev/null +++ b/apps/web-evals/src/app/runs/[id]/run-status.tsx @@ -0,0 +1,55 @@ +"use client" + +import type { RunStatus as _RunStatus } from "@/hooks/use-run-status" +import { cn } from "@/lib/utils" + +export const RunStatus = ({ runStatus: { sseStatus, heartbeat, runners = [] } }: { runStatus: _RunStatus }) => ( +
+
+
+
Task Stream:
+
{sseStatus}
+
+
+
+
+
+
+
+
+
Task Controller:
+
{heartbeat ?? "dead"}
+
+
+
+
+
+
+
+
Task Runners:
+ {runners.length > 0 &&
{runners?.join(", ")}
} +
+
+) diff --git a/apps/web-evals/src/app/runs/[id]/run.tsx b/apps/web-evals/src/app/runs/[id]/run.tsx index ba93b2940a..b6c5290b13 100644 --- a/apps/web-evals/src/app/runs/[id]/run.tsx +++ b/apps/web-evals/src/app/runs/[id]/run.tsx @@ -10,12 +10,13 @@ import { useRunStatus } from "@/hooks/use-run-status" import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" import { TaskStatus } from "./task-status" -import { ConnectionStatus } from "./connection-status" +import { RunStatus } from "./run-status" type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost"> export function Run({ run }: { run: Run }) { - const { tasks, status, tokenUsage, usageUpdatedAt } = useRunStatus(run) + const runStatus = useRunStatus(run) + const { tasks, tokenUsage, usageUpdatedAt } = runStatus const taskMetrics: Record = useMemo(() => { const metrics: Record = {} @@ -45,10 +46,10 @@ export function Run({ run }: { run: Run }) {
-
{run.model}
+
{run.model}
{run.description &&
{run.description}
}
- {!run.taskMetricsId && } + {!run.taskMetricsId && }
{!tasks ? ( diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 43190ca6d6..63a83ab41a 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -3,6 +3,7 @@ import { useCallback, useRef, useState } from "react" import { useRouter } from "next/navigation" import { z } from "zod" +import { useQuery } from "@tanstack/react-query" import { useForm, FormProvider } from "react-hook-form" import { zodResolver } from "@hookform/resolvers/zod" import fuzzysort from "fuzzysort" @@ -11,7 +12,8 @@ import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Book, CircleCheck import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelId } from "@roo-code/types" -import { createRun } from "@/lib/server/runs" +import { createRun } from "@/actions/runs" +import { getExercises } from "@/actions/exercises" import { createRunSchema as formSchema, type CreateRun as FormValues, @@ -22,7 +24,6 @@ import { } from "@/lib/schemas" import { cn } from "@/lib/utils" import { useOpenRouterModels } from "@/hooks/use-open-router-models" -import { useExercises } from "@/hooks/use-exercises" import { Button, FormControl, @@ -65,7 +66,7 @@ export function NewRun() { const modelSearchValueRef = useRef("") const models = useOpenRouterModels() - const exercises = useExercises() + const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() }) const form = useForm({ resolver: zodResolver(formSchema), diff --git a/apps/web-evals/src/hooks/use-exercises.ts b/apps/web-evals/src/hooks/use-exercises.ts deleted file mode 100644 index 811fda93ab..0000000000 --- a/apps/web-evals/src/hooks/use-exercises.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { useQuery } from "@tanstack/react-query" - -import { getExercises } from "@/lib/server/exercises" - -export const useExercises = () => useQuery({ queryKey: ["exercises"], queryFn: () => getExercises() }) diff --git a/apps/web-evals/src/hooks/use-run-status.ts b/apps/web-evals/src/hooks/use-run-status.ts index 69ca1e3632..5ad9e19ff3 100644 --- a/apps/web-evals/src/hooks/use-run-status.ts +++ b/apps/web-evals/src/hooks/use-run-status.ts @@ -2,20 +2,43 @@ import { useState, useCallback, useRef } from "react" import { useQuery, keepPreviousData } from "@tanstack/react-query" import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types" -import type { Run } from "@roo-code/evals" +import type { Run, Task, TaskMetrics } from "@roo-code/evals" -import { getTasks } from "@/lib/server/tasks" -import { useEventSource } from "@/hooks/use-event-source" +import { getHeartbeat } from "@/actions/heartbeat" +import { getRunners } from "@/actions/runners" +import { getTasks } from "@/actions/tasks" +import { type EventSourceStatus, useEventSource } from "@/hooks/use-event-source" -export const useRunStatus = (run: Run) => { +export type RunStatus = { + sseStatus: EventSourceStatus + heartbeat: string | null | undefined + runners: string[] | undefined + tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined + tokenUsage: Map + usageUpdatedAt: number | undefined +} + +export const useRunStatus = (run: Run): RunStatus => { const [tasksUpdatedAt, setTasksUpdatedAt] = useState() const [usageUpdatedAt, setUsageUpdatedAt] = useState() const tokenUsage = useRef>(new Map()) const startTimes = useRef>(new Map()) + const { data: heartbeat } = useQuery({ + queryKey: ["getHeartbeat", run.id], + queryFn: () => getHeartbeat(run.id), + refetchInterval: 10_000, + }) + + const { data: runners } = useQuery({ + queryKey: ["getRunners", run.id], + queryFn: () => getRunners(run.id), + refetchInterval: 10_000, + }) + const { data: tasks } = useQuery({ - queryKey: ["run", run.id, tasksUpdatedAt], + queryKey: ["getTasks", run.id, tasksUpdatedAt], queryFn: async () => getTasks(run.id), placeholderData: keepPreviousData, refetchInterval: 30_000, @@ -65,10 +88,12 @@ export const useRunStatus = (run: Run) => { } }, []) - const status = useEventSource({ url, onMessage }) + const sseStatus = useEventSource({ url, onMessage }) return { - status, + sseStatus, + heartbeat, + runners, tasks, tokenUsage: tokenUsage.current, usageUpdatedAt, diff --git a/apps/web-evals/src/hooks/use-runners.ts b/apps/web-evals/src/hooks/use-runners.ts deleted file mode 100644 index 8fc10aeb8b..0000000000 --- a/apps/web-evals/src/hooks/use-runners.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { useQuery } from "@tanstack/react-query" - -import { getRunners } from "@/lib/server/runners" - -export const useRunners = (runId: number) => - useQuery({ - queryKey: ["runners", runId], - queryFn: () => getRunners(runId), - refetchInterval: 10_000, - }) diff --git a/apps/web-evals/src/lib/server/sse-stream.ts b/apps/web-evals/src/lib/server/sse-stream.ts index 47ab8bb6ca..43d368497a 100644 --- a/apps/web-evals/src/lib/server/sse-stream.ts +++ b/apps/web-evals/src/lib/server/sse-stream.ts @@ -36,9 +36,8 @@ export class SSEStream { try { await this._writer.close() - } catch (error) { + } catch (_error) { // Writer might already be closed, ignore the error. - console.debug("[SSEStream#close] Writer already closed:", error) } } diff --git a/packages/evals/src/cli/redis.ts b/packages/evals/src/cli/redis.ts index 07e4ae7e08..8f2c164e49 100644 --- a/packages/evals/src/cli/redis.ts +++ b/packages/evals/src/cli/redis.ts @@ -1,5 +1,7 @@ import { createClient, type RedisClientType } from "redis" +import { EVALS_TIMEOUT } from "@roo-code/types" + let redis: RedisClientType | undefined export const redisClient = async () => { @@ -18,26 +20,28 @@ export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}` export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { const redis = await redisClient() - await redis.sAdd(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`) + const runnersKey = getRunnersKey(runId) + await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`) + await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000) } export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => { const redis = await redisClient() - await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`) + await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`) } -export const startHeartbeat = async (runId: number, interval: number = 10) => { +export const startHeartbeat = async (runId: number, seconds: number = 10) => { const pid = process.pid.toString() const redis = await redisClient() const heartbeatKey = getHeartbeatKey(runId) - await redis.setEx(heartbeatKey, interval, pid) + await redis.setEx(heartbeatKey, seconds, pid) return setInterval( () => - redis.expire(heartbeatKey, interval).catch((error) => { + redis.expire(heartbeatKey, seconds).catch((error) => { console.error("heartbeat error:", error) }), - (interval * 1_000) / 2, + (seconds * 1_000) / 2, ) }