Skip to content

Commit 5a57c3a

Browse files
committed
More progress
1 parent 30803e7 commit 5a57c3a

File tree

22 files changed

+1262
-109
lines changed

22 files changed

+1262
-109
lines changed

benchmark/README.md

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -30,60 +30,8 @@ cp packages/server/.env.sample packages/server/.env
3030
pnpm --filter @benchmark/server db:migrate
3131
```
3232

33-
Run the benchmark server:
33+
Run the web ui:
3434

3535
```sh
36-
pnpm dev
37-
```
38-
39-
################################################################################
40-
41-
Configure ENV vars (OpenRouter, PostHog, etc):
42-
43-
```sh
44-
cp .env.local.sample .env.local
45-
# Update ENV vars as needed.
46-
```
47-
48-
Build and run a Docker image with the development environment needed to run the
49-
benchmarks (C++, Go, Java, Node.js, Python & Rust):
50-
51-
```sh
52-
npm run docker:start
53-
```
54-
55-
Run an exercise:
56-
57-
```sh
58-
npm run docker:benchmark -- -e exercises/javascript/binary
59-
```
60-
61-
Select and run an exercise:
62-
63-
```sh
64-
npm run cli
65-
```
66-
67-
Select and run an exercise for a specific language:
68-
69-
```sh
70-
npm run cli -- run rust
71-
```
72-
73-
Run all exercises for a language:
74-
75-
```sh
76-
npm run cli -- run rust all
77-
```
78-
79-
Run all exercises:
80-
81-
```sh
82-
npm run cli -- run all
83-
```
84-
85-
Run all exercises using a specific runId (useful for re-trying when an unexpected error occurs):
86-
87-
```sh
88-
npm run cli -- run all --runId 1
36+
pnpm web
8937
```

benchmark/packages/db/src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ export { schema } from "./schema.js"
66
* runs
77
*/
88

9-
export { type Run, insertRunSchema } from "./schema.js"
9+
export { type Run, type InsertRun, insertRunSchema } from "./schema.js"
1010
export { findRun, createRun, getRuns } from "./queries/runs.js"
1111

1212
/**
1313
* tasks
1414
*/
1515

16-
export { type Task, insertTaskSchema } from "./schema.js"
16+
export { type Task, type InsertTask, insertTaskSchema } from "./schema.js"
1717
export { findTask, createTask, getTask } from "./queries/tasks.js"

benchmark/packages/web/package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,19 @@
1313
},
1414
"dependencies": {
1515
"@benchmark/db": "workspace:^",
16+
"@hookform/resolvers": "^4.1.3",
17+
"@radix-ui/react-label": "^2.1.2",
18+
"@radix-ui/react-select": "^2.1.6",
1619
"@radix-ui/react-slot": "^1.1.2",
20+
"@tanstack/react-query": "^5.69.0",
1721
"class-variance-authority": "^0.7.1",
1822
"clsx": "^2.1.1",
1923
"lucide-react": "^0.479.0",
2024
"next": "15.2.2",
2125
"next-themes": "^0.4.6",
2226
"react": "^19.0.0",
2327
"react-dom": "^19.0.0",
28+
"react-hook-form": "^7.54.2",
2429
"tailwind-merge": "^3.0.2",
2530
"tailwindcss-animate": "^1.0.7",
2631
"zod": "^3.24.2"
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { getRuns } from "@benchmark/db"
2+
3+
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
4+
import { formatCurrency, formatDuration } from "@/lib"
5+
6+
export const dynamic = "force-dynamic"
7+
8+
export async function Home() {
9+
const runs = await getRuns()
10+
11+
return (
12+
<div className="mx-auto my-20 w-3xl">
13+
<Table className="border">
14+
<TableHeader>
15+
<TableRow>
16+
<TableHead>ID</TableHead>
17+
<TableHead>Model</TableHead>
18+
<TableHead>Timestamp</TableHead>
19+
<TableHead>Passed</TableHead>
20+
<TableHead>Failed</TableHead>
21+
<TableHead>% Correct</TableHead>
22+
<TableHead>Cost</TableHead>
23+
<TableHead>Duration</TableHead>
24+
</TableRow>
25+
</TableHeader>
26+
<TableBody>
27+
{runs.map((run) => (
28+
<TableRow key={run.id}>
29+
<TableCell>{run.id}</TableCell>
30+
<TableCell>{run.model}</TableCell>
31+
<TableCell>{new Date(run.createdAt).toLocaleString()}</TableCell>
32+
<TableCell>{run.passed}</TableCell>
33+
<TableCell>{run.failed}</TableCell>
34+
<TableCell>{(run.rate * 100).toFixed(1)}%</TableCell>
35+
<TableCell>{formatCurrency(run.cost)}</TableCell>
36+
<TableCell>{formatDuration(run.duration)}</TableCell>
37+
</TableRow>
38+
))}
39+
</TableBody>
40+
</Table>
41+
</div>
42+
)
43+
}

benchmark/packages/web/src/app/layout.tsx

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,12 @@
11
import type { Metadata } from "next"
22
import { Geist, Geist_Mono } from "next/font/google"
33

4-
import { ThemeProvider } from "@/components/theme-provider"
4+
import { ThemeProvider, ReactQueryProvider } from "@/components/providers"
55

66
import "./globals.css"
77

8-
const geistSans = Geist({
9-
variable: "--font-geist-sans",
10-
subsets: ["latin"],
11-
})
12-
13-
const geistMono = Geist_Mono({
14-
variable: "--font-geist-mono",
15-
16-
subsets: ["latin"],
17-
})
8+
const geistSans = Geist({ variable: "--font-geist-sans", subsets: ["latin"] })
9+
const geistMono = Geist_Mono({ variable: "--font-geist-mono", subsets: ["latin"] })
1810

1911
export const metadata: Metadata = {
2012
title: "Roo Code Benchmarks",
@@ -29,7 +21,7 @@ export default function RootLayout({
2921
<html lang="en">
3022
<body className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
3123
<ThemeProvider attribute="class" defaultTheme="system" enableSystem disableTransitionOnChange>
32-
{children}
24+
<ReactQueryProvider>{children}</ReactQueryProvider>
3325
</ThemeProvider>
3426
</body>
3527
</html>
Lines changed: 3 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,5 @@
1-
import { getRuns } from "@benchmark/db"
1+
import { Home } from "./home"
22

3-
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
4-
import { formatCurrency, formatDuration } from "@/lib"
5-
6-
export const dynamic = "force-dynamic"
7-
8-
export default async function Home() {
9-
const runs = await getRuns()
10-
11-
return (
12-
<div className="mx-auto my-20 w-3xl">
13-
<Table className="border">
14-
<TableHeader>
15-
<TableRow>
16-
<TableHead>ID</TableHead>
17-
<TableHead>Model</TableHead>
18-
<TableHead>Timestamp</TableHead>
19-
<TableHead>Passed</TableHead>
20-
<TableHead>Failed</TableHead>
21-
<TableHead>% Correct</TableHead>
22-
<TableHead>Cost</TableHead>
23-
<TableHead>Duration</TableHead>
24-
</TableRow>
25-
</TableHeader>
26-
<TableBody>
27-
{runs.map((run) => (
28-
<TableRow key={run.id}>
29-
<TableCell>{run.id}</TableCell>
30-
<TableCell>{run.model}</TableCell>
31-
<TableCell>{new Date(run.createdAt).toLocaleString()}</TableCell>
32-
<TableCell>{run.passed}</TableCell>
33-
<TableCell>{run.failed}</TableCell>
34-
<TableCell>{(run.rate * 100).toFixed(1)}%</TableCell>
35-
<TableCell>{formatCurrency(run.cost)}</TableCell>
36-
<TableCell>{formatDuration(run.duration)}</TableCell>
37-
</TableRow>
38-
))}
39-
</TableBody>
40-
</Table>
41-
</div>
42-
)
3+
export default async function Page() {
4+
return <Home />
435
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { ShowRun } from "./show-run"
2+
3+
export default async function Page() {
4+
return <ShowRun />
5+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export async function ShowRun() {
2+
return <div>Show Run</div>
3+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"use server"
2+
3+
import { revalidatePath } from "next/cache"
4+
5+
import * as db from "@benchmark/db"
6+
7+
export async function createRun(data: db.InsertRun) {
8+
const run = await db.createRun(data)
9+
revalidatePath("/runs")
10+
return run
11+
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"use client"
2+
3+
import { useState } from "react"
4+
import { useRouter } from "next/navigation"
5+
import { useForm, FormProvider } from "react-hook-form"
6+
import { zodResolver } from "@hookform/resolvers/zod"
7+
import { z } from "zod"
8+
9+
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
10+
import {
11+
Button,
12+
FormControl,
13+
FormDescription,
14+
FormField,
15+
FormItem,
16+
FormLabel,
17+
FormMessage,
18+
Select,
19+
SelectContent,
20+
SelectItem,
21+
SelectTrigger,
22+
SelectValue,
23+
Textarea,
24+
} from "@/components/ui"
25+
26+
import { createRun } from "./actions"
27+
28+
const formSchema = z.object({
29+
model: z.string({
30+
required_error: "Please select a model",
31+
}),
32+
description: z.string().optional(),
33+
})
34+
35+
type FormValues = z.infer<typeof formSchema>
36+
37+
export function NewRun() {
38+
const router = useRouter()
39+
const { data: models, isLoading, error } = useOpenRouterModels()
40+
const [isSubmitting, setIsSubmitting] = useState(false)
41+
42+
const form = useForm<FormValues>({
43+
resolver: zodResolver(formSchema),
44+
})
45+
46+
async function onSubmit(data: FormValues) {
47+
setIsSubmitting(true)
48+
49+
try {
50+
const run = await createRun(data)
51+
router.push(`/runs/${run.id}`)
52+
} catch (error) {
53+
console.error("Error creating run:", error)
54+
setIsSubmitting(false)
55+
}
56+
}
57+
58+
return (
59+
<div className="space-y-6 max-w-2xl mx-auto p-6">
60+
<div className="space-y-2">
61+
<h1 className="text-2xl font-bold">Create New Run</h1>
62+
<p className="text-muted-foreground">
63+
Create a new run by selecting a model and providing an optional description.
64+
</p>
65+
</div>
66+
<div>
67+
<FormProvider {...form}>
68+
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
69+
<FormField
70+
control={form.control}
71+
name="model"
72+
render={({ field }) => (
73+
<FormItem>
74+
<FormLabel>Model</FormLabel>
75+
<Select onValueChange={field.onChange} defaultValue={field.value}>
76+
<FormControl>
77+
<SelectTrigger>
78+
<SelectValue placeholder="Select a model" />
79+
</SelectTrigger>
80+
</FormControl>
81+
<SelectContent>
82+
{isLoading ? (
83+
<div className="p-2 text-center text-muted-foreground">
84+
Loading models...
85+
</div>
86+
) : error ? (
87+
<div className="p-2 text-center text-destructive">
88+
Error loading models. Using fallback options.
89+
</div>
90+
) : (
91+
models?.map((model) => (
92+
<SelectItem key={model.id} value={model.id}>
93+
{model.name}
94+
</SelectItem>
95+
))
96+
)}
97+
</SelectContent>
98+
</Select>
99+
<FormDescription>Select the model to use for this run.</FormDescription>
100+
<FormMessage />
101+
</FormItem>
102+
)}
103+
/>
104+
<FormField
105+
control={form.control}
106+
name="description"
107+
render={({ field }) => (
108+
<FormItem>
109+
<FormLabel>Description</FormLabel>
110+
<FormControl>
111+
<Textarea
112+
placeholder="Enter a description for this run (optional)"
113+
className="resize-none"
114+
{...field}
115+
/>
116+
</FormControl>
117+
<FormDescription>
118+
Provide an optional description to help identify this run.
119+
</FormDescription>
120+
<FormMessage />
121+
</FormItem>
122+
)}
123+
/>
124+
<Button type="submit" disabled={isSubmitting}>
125+
{isSubmitting ? "Creating..." : "Create Run"}
126+
</Button>
127+
</form>
128+
</FormProvider>
129+
</div>
130+
</div>
131+
)
132+
}

0 commit comments

Comments
 (0)