Skip to content

Commit 80ac5b9

Browse files
committed
More progress
1 parent 99709dd commit 80ac5b9

File tree

4 files changed

+107
-93
lines changed

4 files changed

+107
-93
lines changed

apps/web-roo-code/src/app/evals/evals.tsx

Lines changed: 12 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,35 @@
11
"use client"
22

33
import { useMemo } from "react"
4-
import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts"
5-
6-
import type { TaskMetrics, Run } from "@roo-code/evals"
74

85
import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib"
96
import { useOpenRouterModels } from "@/lib/hooks"
10-
import {
11-
ChartContainer,
12-
ChartTooltip,
13-
ChartTooltipContent,
14-
ChartConfig,
15-
ChartLegend,
16-
ChartLegendContent,
17-
Table,
18-
TableBody,
19-
TableCaption,
20-
TableCell,
21-
TableHead,
22-
TableHeader,
23-
TableRow,
24-
} from "@/components/ui"
7+
import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
8+
9+
import type { EvalRun } from "./types"
10+
import { Plot } from "./plot"
2511

26-
export function Evals({
27-
runs,
28-
}: {
29-
runs: (Run & {
30-
label: string
31-
score: number
32-
languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number>
33-
taskMetrics: TaskMetrics
34-
modelId?: string
35-
})[]
36-
}) {
12+
export function Evals({ runs }: { runs: EvalRun[] }) {
3713
const { data: openRouterModels } = useOpenRouterModels()
3814

39-
const tableData = useMemo(
15+
const tableData: (EvalRun & { label: string; cost: number })[] = useMemo(
4016
() =>
4117
runs.map((run) => {
4218
const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo
4319

4420
return {
4521
...run,
4622
label: run.name || run.description || run.model,
47-
score: run.score,
4823
cost: run.taskMetrics.cost,
49-
description: run.description ?? openRouterModelInfo?.description,
50-
contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow,
51-
inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice,
52-
outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice,
24+
description: run.description ?? openRouterModelInfo?.description ?? null,
25+
contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null,
26+
inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null,
27+
outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null,
5328
}
5429
}),
5530
[runs, openRouterModels],
5631
)
5732

58-
const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData])
59-
60-
const chartConfig = useMemo(
61-
() => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig),
62-
[chartData],
63-
)
64-
6533
return (
6634
<div className="mx-auto flex max-w-screen-lg flex-col gap-8 p-8">
6735
<div className="flex flex-col gap-4">
@@ -133,7 +101,7 @@ export function Evals({
133101
<TableBody className="font-mono">
134102
{tableData.map((run) => (
135103
<TableRow key={run.id}>
136-
<TableCell title={run.description}>
104+
<TableCell title={run.description ?? undefined}>
137105
<div className="font-sans">{run.label}</div>
138106
<div className="text-xs opacity-50">{formatTokens(run.contextWindow ?? 0)}</div>
139107
</TableCell>
@@ -173,58 +141,9 @@ export function Evals({
173141
))}
174142
</TableBody>
175143
<TableCaption>
176-
<div className="pb-4 font-medium">Cost Versus Score</div>
177-
<ChartContainer config={chartConfig} className="h-[500px] w-full">
178-
<ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}>
179-
<XAxis
180-
type="number"
181-
dataKey="cost"
182-
name="Cost"
183-
domain={[
184-
(dataMin: number) => Math.round((dataMin - 5) / 5) * 5,
185-
(dataMax: number) => Math.round((dataMax + 5) / 5) * 5,
186-
]}
187-
tickFormatter={(value) => formatCurrency(value)}>
188-
<Label value="Cost" position="bottom" offset={0} />
189-
</XAxis>
190-
<YAxis
191-
type="number"
192-
dataKey="score"
193-
name="Score"
194-
domain={[
195-
(dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5),
196-
(dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5),
197-
]}
198-
tickFormatter={(value) => `${value}%`}>
199-
<Label value="Score" angle={-90} position="left" dy={-15} />
200-
</YAxis>
201-
<ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} />
202-
<Customized component={renderQuadrant} />
203-
{chartData.map((d, i) => (
204-
<Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} />
205-
))}
206-
<ChartLegend content={<ChartLegendContent />} />
207-
</ScatterChart>
208-
</ChartContainer>
209-
<div className="py-4 text-xs opacity-50">
210-
(Note: Very expensive models are excluded from the scatter plot.)
211-
</div>
144+
<Plot tableData={tableData} />
212145
</TableCaption>
213146
</Table>
214147
</div>
215148
)
216149
}
217-
218-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
219-
const renderQuadrant = (props: any) => (
220-
<Cross
221-
width={props.width}
222-
height={props.height}
223-
x={props.width / 2 + 35}
224-
y={props.height / 2 - 15}
225-
top={0}
226-
left={0}
227-
stroke="currentColor"
228-
opacity={0.1}
229-
/>
230-
)
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"use client"
2+
3+
import { useMemo } from "react"
4+
import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts"
5+
6+
import { formatCurrency } from "@/lib"
7+
import {
8+
ChartContainer,
9+
ChartTooltip,
10+
ChartTooltipContent,
11+
ChartConfig,
12+
ChartLegend,
13+
ChartLegendContent,
14+
} from "@/components/ui"
15+
16+
import type { EvalRun } from "./types"
17+
18+
type PlotProps = {
19+
tableData: (EvalRun & { label: string; cost: number })[]
20+
}
21+
22+
export const Plot = ({ tableData }: PlotProps) => {
23+
const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData])
24+
25+
const chartConfig = useMemo(
26+
() => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig),
27+
[chartData],
28+
)
29+
30+
return (
31+
<>
32+
<div className="pb-4 font-medium">Cost Versus Score</div>
33+
<ChartContainer config={chartConfig} className="h-[500px] w-full">
34+
<ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}>
35+
<XAxis
36+
type="number"
37+
dataKey="cost"
38+
name="Cost"
39+
domain={[
40+
(dataMin: number) => Math.round((dataMin - 5) / 5) * 5,
41+
(dataMax: number) => Math.round((dataMax + 5) / 5) * 5,
42+
]}
43+
tickFormatter={(value) => formatCurrency(value)}>
44+
<Label value="Cost" position="bottom" offset={0} />
45+
</XAxis>
46+
<YAxis
47+
type="number"
48+
dataKey="score"
49+
name="Score"
50+
domain={[
51+
(dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5),
52+
(dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5),
53+
]}
54+
tickFormatter={(value) => `${value}%`}>
55+
<Label value="Score" angle={-90} position="left" dy={-15} />
56+
</YAxis>
57+
<ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} />
58+
<Customized component={renderQuadrant} />
59+
{chartData.map((d, i) => (
60+
<Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} />
61+
))}
62+
<ChartLegend content={<ChartLegendContent />} />
63+
</ScatterChart>
64+
</ChartContainer>
65+
<div className="py-4 text-xs opacity-50">
66+
(Note: Very expensive models are excluded from the scatter plot.)
67+
</div>
68+
</>
69+
)
70+
}
71+
72+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
73+
const renderQuadrant = (props: any) => (
74+
<Cross
75+
width={props.width}
76+
height={props.height}
77+
x={props.width / 2 + 35}
78+
y={props.height / 2 - 15}
79+
top={0}
80+
left={0}
81+
stroke="currentColor"
82+
opacity={0.1}
83+
/>
84+
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import type { TaskMetrics, Run } from "@roo-code/evals"
2+
3+
export type EvalRun = Run & {
4+
label: string
5+
score: number
6+
languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number>
7+
taskMetrics: TaskMetrics
8+
modelId?: string
9+
}

packages/evals/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
"drizzle-kit:production": "dotenvx run -f .env.production -- tsx node_modules/drizzle-kit/bin.cjs",
1616
"db:generate": "pnpm drizzle-kit generate",
1717
"db:migrate": "pnpm drizzle-kit migrate",
18+
"db:test:migrate": "pnpm drizzle-kit:test migrate",
19+
"db:production:migrate": "pnpm drizzle-kit:production migrate",
1820
"db:push": "pnpm drizzle-kit push",
1921
"db:test:push": "pnpm drizzle-kit:test push",
2022
"db:production:push": "pnpm drizzle-kit:production push",

0 commit comments

Comments
 (0)