|
1 | 1 | "use client" |
2 | 2 |
|
3 | 3 | import { useMemo } from "react" |
4 | | -import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts" |
5 | | - |
6 | | -import type { TaskMetrics, Run } from "@roo-code/evals" |
7 | 4 |
|
8 | 5 | import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib" |
9 | 6 | import { useOpenRouterModels } from "@/lib/hooks" |
10 | | -import { |
11 | | - ChartContainer, |
12 | | - ChartTooltip, |
13 | | - ChartTooltipContent, |
14 | | - ChartConfig, |
15 | | - ChartLegend, |
16 | | - ChartLegendContent, |
17 | | - Table, |
18 | | - TableBody, |
19 | | - TableCaption, |
20 | | - TableCell, |
21 | | - TableHead, |
22 | | - TableHeader, |
23 | | - TableRow, |
24 | | -} from "@/components/ui" |
| 7 | +import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" |
| 8 | + |
| 9 | +import type { EvalRun } from "./types" |
| 10 | +import { Plot } from "./plot" |
25 | 11 |
|
26 | | -export function Evals({ |
27 | | - runs, |
28 | | -}: { |
29 | | - runs: (Run & { |
30 | | - label: string |
31 | | - score: number |
32 | | - languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number> |
33 | | - taskMetrics: TaskMetrics |
34 | | - modelId?: string |
35 | | - })[] |
36 | | -}) { |
| 12 | +export function Evals({ runs }: { runs: EvalRun[] }) { |
37 | 13 | const { data: openRouterModels } = useOpenRouterModels() |
38 | 14 |
|
39 | | - const tableData = useMemo( |
| 15 | + const tableData: (EvalRun & { label: string; cost: number })[] = useMemo( |
40 | 16 | () => |
41 | 17 | runs.map((run) => { |
42 | 18 | const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo |
43 | 19 |
|
44 | 20 | return { |
45 | 21 | ...run, |
46 | 22 | label: run.name || run.description || run.model, |
47 | | - score: run.score, |
48 | 23 | cost: run.taskMetrics.cost, |
49 | | - description: run.description ?? openRouterModelInfo?.description, |
50 | | - contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow, |
51 | | - inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice, |
52 | | - outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice, |
| 24 | + description: run.description ?? openRouterModelInfo?.description ?? null, |
| 25 | + contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null, |
| 26 | + inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null, |
| 27 | + outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null, |
53 | 28 | } |
54 | 29 | }), |
55 | 30 | [runs, openRouterModels], |
56 | 31 | ) |
57 | 32 |
|
58 | | - const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData]) |
59 | | - |
60 | | - const chartConfig = useMemo( |
61 | | - () => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig), |
62 | | - [chartData], |
63 | | - ) |
64 | | - |
65 | 33 | return ( |
66 | 34 | <div className="mx-auto flex max-w-screen-lg flex-col gap-8 p-8"> |
67 | 35 | <div className="flex flex-col gap-4"> |
@@ -133,7 +101,7 @@ export function Evals({ |
133 | 101 | <TableBody className="font-mono"> |
134 | 102 | {tableData.map((run) => ( |
135 | 103 | <TableRow key={run.id}> |
136 | | - <TableCell title={run.description}> |
| 104 | + <TableCell title={run.description ?? undefined}> |
137 | 105 | <div className="font-sans">{run.label}</div> |
138 | 106 | <div className="text-xs opacity-50">{formatTokens(run.contextWindow ?? 0)}</div> |
139 | 107 | </TableCell> |
@@ -173,58 +141,9 @@ export function Evals({ |
173 | 141 | ))} |
174 | 142 | </TableBody> |
175 | 143 | <TableCaption> |
176 | | - <div className="pb-4 font-medium">Cost Versus Score</div> |
177 | | - <ChartContainer config={chartConfig} className="h-[500px] w-full"> |
178 | | - <ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}> |
179 | | - <XAxis |
180 | | - type="number" |
181 | | - dataKey="cost" |
182 | | - name="Cost" |
183 | | - domain={[ |
184 | | - (dataMin: number) => Math.round((dataMin - 5) / 5) * 5, |
185 | | - (dataMax: number) => Math.round((dataMax + 5) / 5) * 5, |
186 | | - ]} |
187 | | - tickFormatter={(value) => formatCurrency(value)}> |
188 | | - <Label value="Cost" position="bottom" offset={0} /> |
189 | | - </XAxis> |
190 | | - <YAxis |
191 | | - type="number" |
192 | | - dataKey="score" |
193 | | - name="Score" |
194 | | - domain={[ |
195 | | - (dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5), |
196 | | - (dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5), |
197 | | - ]} |
198 | | - tickFormatter={(value) => `${value}%`}> |
199 | | - <Label value="Score" angle={-90} position="left" dy={-15} /> |
200 | | - </YAxis> |
201 | | - <ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} /> |
202 | | - <Customized component={renderQuadrant} /> |
203 | | - {chartData.map((d, i) => ( |
204 | | - <Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} /> |
205 | | - ))} |
206 | | - <ChartLegend content={<ChartLegendContent />} /> |
207 | | - </ScatterChart> |
208 | | - </ChartContainer> |
209 | | - <div className="py-4 text-xs opacity-50"> |
210 | | - (Note: Very expensive models are excluded from the scatter plot.) |
211 | | - </div> |
| 144 | + <Plot tableData={tableData} /> |
212 | 145 | </TableCaption> |
213 | 146 | </Table> |
214 | 147 | </div> |
215 | 148 | ) |
216 | 149 | } |
217 | | - |
218 | | -// eslint-disable-next-line @typescript-eslint/no-explicit-any |
219 | | -const renderQuadrant = (props: any) => ( |
220 | | - <Cross |
221 | | - width={props.width} |
222 | | - height={props.height} |
223 | | - x={props.width / 2 + 35} |
224 | | - y={props.height / 2 - 15} |
225 | | - top={0} |
226 | | - left={0} |
227 | | - stroke="currentColor" |
228 | | - opacity={0.1} |
229 | | - /> |
230 | | -) |
0 commit comments