Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"scripts": {
"lint": "next lint",
"check-types": "tsc -b",
"dev": "scripts/check-services.sh && next dev --turbopack",
"dev": "scripts/check-services.sh && next dev",
"format": "prettier --write src",
"build": "next build",
"start": "next start"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import { fileURLToPath } from "url"

import { type ExerciseLanguage, exerciseLanguages } from "@roo-code/evals"

const __dirname = path.dirname(fileURLToPath(import.meta.url))
const __dirname = path.dirname(fileURLToPath(import.meta.url)) // <repo>/apps/web-evals/src/actions

const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../evals")

export const listDirectories = async (relativePath: string) => {
try {
Expand All @@ -19,9 +21,6 @@ export const listDirectories = async (relativePath: string) => {
}
}

// __dirname = <repo>/evals/apps/web/src/lib/server
const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../../evals")

export const getExercises = async () => {
const result = await Promise.all(
exerciseLanguages.map(async (language) => {
Expand Down
8 changes: 8 additions & 0 deletions apps/web-evals/src/actions/heartbeat.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"use server"

import { redisClient } from "@/lib/server/redis"

export const getHeartbeat = async (runId: number) => {
const redis = await redisClient()
return redis.get(`heartbeat:${runId}`)
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"use server"

import { redisClient } from "./redis"
import { redisClient } from "@/lib/server/redis"

export const getRunners = async (runId: number) => {
const redis = await redisClient()
Expand Down
2 changes: 1 addition & 1 deletion apps/web-evals/src/app/home.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { Ellipsis, Rocket } from "lucide-react"

import type { Run, TaskMetrics } from "@roo-code/evals"

import { deleteRun } from "@/lib/server/runs"
import { deleteRun } from "@/actions/runs"
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
import {
Button,
Expand Down
50 changes: 0 additions & 50 deletions apps/web-evals/src/app/runs/[id]/connection-status.tsx

This file was deleted.

55 changes: 55 additions & 0 deletions apps/web-evals/src/app/runs/[id]/run-status.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"use client"

import type { RunStatus as _RunStatus } from "@/hooks/use-run-status"
import { cn } from "@/lib/utils"

export const RunStatus = ({ runStatus: { sseStatus, heartbeat, runners = [] } }: { runStatus: _RunStatus }) => (
<div>
<div className="flex items-center gap-2">
<div className="flex items-center gap-2">
<div>Task Stream:</div>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User-facing labels like 'Task Stream', 'Task Controller', and 'Task Runners' should use translation utilities rather than hardcoded strings.

This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX.

<div className="font-mono text-sm text-muted-foreground">{sseStatus}</div>
</div>
<div className="relative">
<div
className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
"bg-green-500": sseStatus === "connected",
"bg-amber-500": sseStatus === "waiting",
"bg-rose-500": sseStatus === "error",
})}
/>
<div
className={cn("size-2.5 rounded-full", {
"bg-green-500": sseStatus === "connected",
"bg-amber-500": sseStatus === "waiting",
"bg-rose-500": sseStatus === "error",
})}
/>
</div>
</div>
<div className="flex items-center gap-2">
<div className="flex items-center gap-2">
<div>Task Controller:</div>
<div className="font-mono text-sm text-muted-foreground">{heartbeat ?? "dead"}</div>
</div>
<div className="relative">
<div
className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
"bg-green-500": !!heartbeat,
"bg-rose-500": !heartbeat,
})}
/>
<div
className={cn("size-2.5 rounded-full", {
"bg-green-500": !!heartbeat,
"bg-rose-500": !heartbeat,
})}
/>
</div>
</div>
<div className="flex items-center gap-2">
<div>Task Runners:</div>
{runners.length > 0 && <div className="font-mono text-sm text-muted-foreground">{runners?.join(", ")}</div>}
</div>
</div>
)
9 changes: 5 additions & 4 deletions apps/web-evals/src/app/runs/[id]/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ import { useRunStatus } from "@/hooks/use-run-status"
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"

import { TaskStatus } from "./task-status"
import { ConnectionStatus } from "./connection-status"
import { RunStatus } from "./run-status"

type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">

export function Run({ run }: { run: Run }) {
const { tasks, status, tokenUsage, usageUpdatedAt } = useRunStatus(run)
const runStatus = useRunStatus(run)
const { tasks, tokenUsage, usageUpdatedAt } = runStatus

const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
const metrics: Record<number, TaskMetrics> = {}
Expand Down Expand Up @@ -45,10 +46,10 @@ export function Run({ run }: { run: Run }) {
<div>
<div className="mb-2">
<div>
<div>{run.model}</div>
<div className="font-mono">{run.model}</div>
{run.description && <div className="text-sm text-muted-foreground">{run.description}</div>}
</div>
{!run.taskMetricsId && <ConnectionStatus status={status} runId={run.id} />}
{!run.taskMetricsId && <RunStatus runStatus={runStatus} />}
</div>
{!tasks ? (
<LoaderCircle className="size-4 animate-spin" />
Expand Down
7 changes: 4 additions & 3 deletions apps/web-evals/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { useCallback, useRef, useState } from "react"
import { useRouter } from "next/navigation"
import { z } from "zod"
import { useQuery } from "@tanstack/react-query"
import { useForm, FormProvider } from "react-hook-form"
import { zodResolver } from "@hookform/resolvers/zod"
import fuzzysort from "fuzzysort"
Expand All @@ -11,7 +12,8 @@ import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Book, CircleCheck

import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelId } from "@roo-code/types"

import { createRun } from "@/lib/server/runs"
import { createRun } from "@/actions/runs"
import { getExercises } from "@/actions/exercises"
import {
createRunSchema as formSchema,
type CreateRun as FormValues,
Expand All @@ -22,7 +24,6 @@ import {
} from "@/lib/schemas"
import { cn } from "@/lib/utils"
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
import { useExercises } from "@/hooks/use-exercises"
import {
Button,
FormControl,
Expand Down Expand Up @@ -65,7 +66,7 @@ export function NewRun() {
const modelSearchValueRef = useRef("")

const models = useOpenRouterModels()
const exercises = useExercises()
const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() })

const form = useForm<FormValues>({
resolver: zodResolver(formSchema),
Expand Down
5 changes: 0 additions & 5 deletions apps/web-evals/src/hooks/use-exercises.ts

This file was deleted.

39 changes: 32 additions & 7 deletions apps/web-evals/src/hooks/use-run-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,43 @@ import { useState, useCallback, useRef } from "react"
import { useQuery, keepPreviousData } from "@tanstack/react-query"

import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
import type { Run } from "@roo-code/evals"
import type { Run, Task, TaskMetrics } from "@roo-code/evals"

import { getTasks } from "@/lib/server/tasks"
import { useEventSource } from "@/hooks/use-event-source"
import { getHeartbeat } from "@/actions/heartbeat"
import { getRunners } from "@/actions/runners"
import { getTasks } from "@/actions/tasks"
import { type EventSourceStatus, useEventSource } from "@/hooks/use-event-source"

export const useRunStatus = (run: Run) => {
export type RunStatus = {
sseStatus: EventSourceStatus
heartbeat: string | null | undefined
runners: string[] | undefined
tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined
tokenUsage: Map<number, TokenUsage & { duration?: number }>
usageUpdatedAt: number | undefined
}

export const useRunStatus = (run: Run): RunStatus => {
const [tasksUpdatedAt, setTasksUpdatedAt] = useState<number>()
const [usageUpdatedAt, setUsageUpdatedAt] = useState<number>()

const tokenUsage = useRef<Map<number, TokenUsage & { duration?: number }>>(new Map())
const startTimes = useRef<Map<number, number>>(new Map())

const { data: heartbeat } = useQuery({
queryKey: ["getHeartbeat", run.id],
queryFn: () => getHeartbeat(run.id),
refetchInterval: 10_000,
})

const { data: runners } = useQuery({
queryKey: ["getRunners", run.id],
queryFn: () => getRunners(run.id),
refetchInterval: 10_000,
})

const { data: tasks } = useQuery({
queryKey: ["run", run.id, tasksUpdatedAt],
queryKey: ["getTasks", run.id, tasksUpdatedAt],
queryFn: async () => getTasks(run.id),
placeholderData: keepPreviousData,
refetchInterval: 30_000,
Expand Down Expand Up @@ -65,10 +88,12 @@ export const useRunStatus = (run: Run) => {
}
}, [])

const status = useEventSource({ url, onMessage })
const sseStatus = useEventSource({ url, onMessage })

return {
status,
sseStatus,
heartbeat,
runners,
tasks,
tokenUsage: tokenUsage.current,
usageUpdatedAt,
Expand Down
10 changes: 0 additions & 10 deletions apps/web-evals/src/hooks/use-runners.ts

This file was deleted.

3 changes: 1 addition & 2 deletions apps/web-evals/src/lib/server/sse-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ export class SSEStream {

try {
await this._writer.close()
} catch (error) {
} catch (_error) {
// Writer might already be closed, ignore the error.
console.debug("[SSEStream#close] Writer already closed:", error)
}
}

Expand Down
16 changes: 10 additions & 6 deletions packages/evals/src/cli/redis.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { createClient, type RedisClientType } from "redis"

import { EVALS_TIMEOUT } from "@roo-code/types"

let redis: RedisClientType | undefined

export const redisClient = async () => {
Expand All @@ -18,26 +20,28 @@ export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}`

export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
const redis = await redisClient()
await redis.sAdd(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`)
const runnersKey = getRunnersKey(runId)
await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000)
}

export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
const redis = await redisClient()
await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`)
await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
}

export const startHeartbeat = async (runId: number, interval: number = 10) => {
export const startHeartbeat = async (runId: number, seconds: number = 10) => {
const pid = process.pid.toString()
const redis = await redisClient()
const heartbeatKey = getHeartbeatKey(runId)
await redis.setEx(heartbeatKey, interval, pid)
await redis.setEx(heartbeatKey, seconds, pid)

return setInterval(
() =>
redis.expire(heartbeatKey, interval).catch((error) => {
redis.expire(heartbeatKey, seconds).catch((error) => {
console.error("heartbeat error:", error)
}),
(interval * 1_000) / 2,
(seconds * 1_000) / 2,
)
}

Expand Down
Loading