Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/vscode-e2e/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@roo-code/config-typescript": "workspace:^",
"@roo-code/types": "workspace:^",
"@types/mocha": "^10.0.10",
"@types/node": "^22.14.1",
"@types/node": "20.x",
"@types/vscode": "^1.95.0",
"@vscode/test-cli": "^0.0.11",
"@vscode/test-electron": "^2.4.0",
Expand Down
4 changes: 2 additions & 2 deletions apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"start": "next start"
},
"dependencies": {
"@hookform/resolvers": "^4.1.3",
"@hookform/resolvers": "^5.1.1",
"@radix-ui/react-alert-dialog": "^1.1.7",
"@radix-ui/react-dialog": "^1.1.6",
"@radix-ui/react-dropdown-menu": "^2.1.7",
Expand Down Expand Up @@ -44,7 +44,7 @@
"tailwind-merge": "^3.3.0",
"tailwindcss-animate": "^1.0.7",
"vaul": "^1.1.2",
"zod": "^3.24.2"
"zod": "^3.25.61"
},
"devDependencies": {
"@roo-code/config-eslint": "workspace:^",
Expand Down
158 changes: 0 additions & 158 deletions apps/web-evals/src/app/home.tsx

This file was deleted.

4 changes: 2 additions & 2 deletions apps/web-evals/src/app/page.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { getRuns } from "@roo-code/evals"

import { Home } from "./home"
import { Runs } from "@/components/home/runs"

export const dynamic = "force-dynamic"

export default async function Page() {
const runs = await getRuns()
return <Home runs={runs} />
return <Runs runs={runs} />
}
10 changes: 5 additions & 5 deletions apps/web-evals/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelI
import { createRun } from "@/actions/runs"
import { getExercises } from "@/actions/exercises"
import {
createRunSchema as formSchema,
type CreateRun as FormValues,
createRunSchema,
type CreateRun,
MODEL_DEFAULT,
CONCURRENCY_MIN,
CONCURRENCY_MAX,
Expand Down Expand Up @@ -68,8 +68,8 @@ export function NewRun() {
const models = useOpenRouterModels()
const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() })

const form = useForm<FormValues>({
resolver: zodResolver(formSchema),
const form = useForm<CreateRun>({
resolver: zodResolver(createRunSchema),
defaultValues: {
model: MODEL_DEFAULT,
description: "",
Expand All @@ -94,7 +94,7 @@ export function NewRun() {
const systemPromptRef = useRef<HTMLTextAreaElement>(null)

const onSubmit = useCallback(
async (values: FormValues) => {
async (values: CreateRun) => {
try {
if (mode === "openrouter") {
values.settings = { ...(values.settings || {}), openRouterModelId: model }
Expand Down
149 changes: 149 additions & 0 deletions apps/web-evals/src/components/home/run.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import { useCallback, useState, useRef } from "react"
import Link from "next/link"
import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash } from "lucide-react"

import type { Run as EvalsRun, TaskMetrics as EvalsTaskMetrics } from "@roo-code/evals"

import { deleteRun } from "@/actions/runs"
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
import { useCopyRun } from "@/hooks/use-copy-run"
import {
Button,
TableCell,
TableRow,
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui"

type RunProps = {
run: EvalsRun
taskMetrics: EvalsTaskMetrics | null
}

export function Run({ run, taskMetrics }: RunProps) {
const [deleteRunId, setDeleteRunId] = useState<number>()
const continueRef = useRef<HTMLButtonElement>(null)
const { isPending, copyRun, copied } = useCopyRun(run.id)

const onConfirmDelete = useCallback(async () => {
if (!deleteRunId) {
return
}

try {
await deleteRun(deleteRunId)
setDeleteRunId(undefined)
} catch (error) {
console.error(error)
}
}, [deleteRunId])

return (
<>
<TableRow>
<TableCell>{run.model}</TableCell>
<TableCell>{run.passed}</TableCell>
<TableCell>{run.failed}</TableCell>
<TableCell>
{run.passed + run.failed > 0 && (
<span>{((run.passed / (run.passed + run.failed)) * 100).toFixed(1)}%</span>
)}
</TableCell>
<TableCell>
{taskMetrics && (
<div className="flex items-center gap-1.5">
<div>{formatTokens(taskMetrics.tokensIn)}</div>/
<div>{formatTokens(taskMetrics.tokensOut)}</div>
</div>
)}
</TableCell>
<TableCell>
{taskMetrics?.toolUsage?.apply_diff && (
<div className="flex flex-row items-center gap-1.5">
<div>{taskMetrics.toolUsage.apply_diff.attempts}</div>
<div>/</div>
<div>{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}</div>
</div>
)}
</TableCell>
<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
<TableCell>
<DropdownMenu>
<Button variant="ghost" size="icon" asChild>
<DropdownMenuTrigger>
<Ellipsis />
</DropdownMenuTrigger>
</Button>
<DropdownMenuContent align="end">
<DropdownMenuItem asChild>
<Link href={`/runs/${run.id}`}>
<div className="flex items-center gap-1">
<ClipboardList />
<div>View Tasks</div>
</div>
</Link>
</DropdownMenuItem>
{run.taskMetricsId && (
<DropdownMenuItem onClick={() => copyRun()} disabled={isPending || copied}>
<div className="flex items-center gap-1">
{isPending ? (
<>
<LoaderCircle className="animate-spin" />
Copying...
</>
) : copied ? (
<>
<Check />
Copied!
</>
) : (
<>
<Copy />
Copy to Production
</>
)}
</div>
</DropdownMenuItem>
)}
<DropdownMenuItem
onClick={() => {
setDeleteRunId(run.id)
setTimeout(() => continueRef.current?.focus(), 0)
}}>
<div className="flex items-center gap-1">
<Trash />
<div>Delete</div>
</div>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</TableCell>
</TableRow>
<AlertDialog open={!!deleteRunId} onOpenChange={() => setDeleteRunId(undefined)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Are you sure?</AlertDialogTitle>
<AlertDialogDescription>This action cannot be undone.</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>Cancel</AlertDialogCancel>
<AlertDialogAction ref={continueRef} onClick={onConfirmDelete}>
Continue
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</>
)
}
Loading