Skip to content

Commit 247da38

Browse files
authored
Add model info to eval runs table (RooCodeInc#7749)
1 parent 18cf33f commit 247da38

File tree

10 files changed

+845
-100
lines changed

10 files changed

+845
-100
lines changed
Lines changed: 24 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,33 @@
11
"use client"
22

33
import { useMemo } from "react"
4-
import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts"
5-
6-
import type { TaskMetrics, Run } from "@roo-code/evals"
74

85
import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib"
96
import { useOpenRouterModels } from "@/lib/hooks"
10-
import {
11-
ChartContainer,
12-
ChartTooltip,
13-
ChartTooltipContent,
14-
ChartConfig,
15-
ChartLegend,
16-
ChartLegendContent,
17-
Table,
18-
TableBody,
19-
TableCaption,
20-
TableCell,
21-
TableHead,
22-
TableHeader,
23-
TableRow,
24-
} from "@/components/ui"
7+
import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
8+
9+
import type { EvalRun } from "./types"
10+
import { Plot } from "./plot"
2511

26-
export function Evals({
27-
runs,
28-
}: {
29-
runs: (Run & {
30-
label: string
31-
score: number
32-
languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number>
33-
taskMetrics: TaskMetrics
34-
modelId?: string
35-
})[]
36-
}) {
12+
export function Evals({ runs }: { runs: EvalRun[] }) {
3713
const { data: openRouterModels } = useOpenRouterModels()
3814

39-
const tableData = useMemo(
15+
const tableData: (EvalRun & { label: string; cost: number })[] = useMemo(
4016
() =>
41-
runs.map((run) => ({
42-
...run,
43-
label: run.description || run.model,
44-
score: run.score,
45-
cost: run.taskMetrics.cost,
46-
model: openRouterModels?.[run.modelId ?? ""],
47-
modelInfo: openRouterModels?.[run.modelId ?? ""]?.modelInfo,
48-
})),
49-
[runs, openRouterModels],
50-
)
17+
runs.map((run) => {
18+
const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo
5119

52-
const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData])
53-
54-
const chartConfig = useMemo(
55-
() => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig),
56-
[chartData],
20+
return {
21+
...run,
22+
label: run.name || run.description || run.model,
23+
cost: run.taskMetrics.cost,
24+
description: run.description ?? openRouterModelInfo?.description ?? null,
25+
contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null,
26+
inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null,
27+
outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null,
28+
}
29+
}),
30+
[runs, openRouterModels],
5731
)
5832

5933
return (
@@ -127,15 +101,15 @@ export function Evals({
127101
<TableBody className="font-mono">
128102
{tableData.map((run) => (
129103
<TableRow key={run.id}>
130-
<TableCell title={run.model?.description}>
104+
<TableCell title={run.description ?? undefined}>
131105
<div className="font-sans">{run.label}</div>
132-
<div className="text-xs opacity-50">{formatTokens(run.modelInfo?.contextWindow)}</div>
106+
<div className="text-xs opacity-50">{formatTokens(run.contextWindow)}</div>
133107
</TableCell>
134108
<TableCell className="border-r">
135109
<div className="flex flex-row gap-2">
136-
<div>{formatCurrency(run.modelInfo?.inputPrice)}</div>
110+
<div>{formatCurrency(run.inputPrice)}</div>
137111
<div className="opacity-25">/</div>
138-
<div>{formatCurrency(run.modelInfo?.outputPrice)}</div>
112+
<div>{formatCurrency(run.outputPrice)}</div>
139113
</div>
140114
</TableCell>
141115
<TableCell className="font-mono">{formatDuration(run.taskMetrics.duration)}</TableCell>
@@ -167,58 +141,9 @@ export function Evals({
167141
))}
168142
</TableBody>
169143
<TableCaption>
170-
<div className="pb-4 font-medium">Cost Versus Score</div>
171-
<ChartContainer config={chartConfig} className="h-[500px] w-full">
172-
<ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}>
173-
<XAxis
174-
type="number"
175-
dataKey="cost"
176-
name="Cost"
177-
domain={[
178-
(dataMin: number) => Math.round((dataMin - 5) / 5) * 5,
179-
(dataMax: number) => Math.round((dataMax + 5) / 5) * 5,
180-
]}
181-
tickFormatter={(value) => formatCurrency(value)}>
182-
<Label value="Cost" position="bottom" offset={0} />
183-
</XAxis>
184-
<YAxis
185-
type="number"
186-
dataKey="score"
187-
name="Score"
188-
domain={[
189-
(dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5),
190-
(dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5),
191-
]}
192-
tickFormatter={(value) => `${value}%`}>
193-
<Label value="Score" angle={-90} position="left" dy={-15} />
194-
</YAxis>
195-
<ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} />
196-
<Customized component={renderQuadrant} />
197-
{chartData.map((d, i) => (
198-
<Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} />
199-
))}
200-
<ChartLegend content={<ChartLegendContent />} />
201-
</ScatterChart>
202-
</ChartContainer>
203-
<div className="py-4 text-xs opacity-50">
204-
(Note: Very expensive models are excluded from the scatter plot.)
205-
</div>
144+
<Plot tableData={tableData} />
206145
</TableCaption>
207146
</Table>
208147
</div>
209148
)
210149
}
211-
212-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
213-
const renderQuadrant = (props: any) => (
214-
<Cross
215-
width={props.width}
216-
height={props.height}
217-
x={props.width / 2 + 35}
218-
y={props.height / 2 - 15}
219-
top={0}
220-
left={0}
221-
stroke="currentColor"
222-
opacity={0.1}
223-
/>
224-
)

0 commit comments

Comments
 (0)