|
1 | 1 | "use client" |
2 | 2 |
|
3 | 3 | import { useMemo } from "react" |
4 | | -import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts" |
5 | | - |
6 | | -import type { TaskMetrics, Run } from "@roo-code/evals" |
7 | 4 |
|
8 | 5 | import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib" |
9 | 6 | import { useOpenRouterModels } from "@/lib/hooks" |
10 | | -import { |
11 | | - ChartContainer, |
12 | | - ChartTooltip, |
13 | | - ChartTooltipContent, |
14 | | - ChartConfig, |
15 | | - ChartLegend, |
16 | | - ChartLegendContent, |
17 | | - Table, |
18 | | - TableBody, |
19 | | - TableCaption, |
20 | | - TableCell, |
21 | | - TableHead, |
22 | | - TableHeader, |
23 | | - TableRow, |
24 | | -} from "@/components/ui" |
| 7 | +import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" |
| 8 | + |
| 9 | +import type { EvalRun } from "./types" |
| 10 | +import { Plot } from "./plot" |
25 | 11 |
|
26 | | -export function Evals({ |
27 | | - runs, |
28 | | -}: { |
29 | | - runs: (Run & { |
30 | | - label: string |
31 | | - score: number |
32 | | - languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number> |
33 | | - taskMetrics: TaskMetrics |
34 | | - modelId?: string |
35 | | - })[] |
36 | | -}) { |
| 12 | +export function Evals({ runs }: { runs: EvalRun[] }) { |
37 | 13 | const { data: openRouterModels } = useOpenRouterModels() |
38 | 14 |
|
39 | | - const tableData = useMemo( |
| 15 | + const tableData: (EvalRun & { label: string; cost: number })[] = useMemo( |
40 | 16 | () => |
41 | | - runs.map((run) => ({ |
42 | | - ...run, |
43 | | - label: run.description || run.model, |
44 | | - score: run.score, |
45 | | - cost: run.taskMetrics.cost, |
46 | | - model: openRouterModels?.[run.modelId ?? ""], |
47 | | - modelInfo: openRouterModels?.[run.modelId ?? ""]?.modelInfo, |
48 | | - })), |
49 | | - [runs, openRouterModels], |
50 | | - ) |
| 17 | + runs.map((run) => { |
| 18 | + const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo |
51 | 19 |
|
52 | | - const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData]) |
53 | | - |
54 | | - const chartConfig = useMemo( |
55 | | - () => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig), |
56 | | - [chartData], |
| 20 | + return { |
| 21 | + ...run, |
| 22 | + label: run.name || run.description || run.model, |
| 23 | + cost: run.taskMetrics.cost, |
| 24 | + description: run.description ?? openRouterModelInfo?.description ?? null, |
| 25 | + contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null, |
| 26 | + inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null, |
| 27 | + outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null, |
| 28 | + } |
| 29 | + }), |
| 30 | + [runs, openRouterModels], |
57 | 31 | ) |
58 | 32 |
|
59 | 33 | return ( |
@@ -127,15 +101,15 @@ export function Evals({ |
127 | 101 | <TableBody className="font-mono"> |
128 | 102 | {tableData.map((run) => ( |
129 | 103 | <TableRow key={run.id}> |
130 | | - <TableCell title={run.model?.description}> |
| 104 | + <TableCell title={run.description ?? undefined}> |
131 | 105 | <div className="font-sans">{run.label}</div> |
132 | | - <div className="text-xs opacity-50">{formatTokens(run.modelInfo?.contextWindow)}</div> |
| 106 | + <div className="text-xs opacity-50">{formatTokens(run.contextWindow)}</div> |
133 | 107 | </TableCell> |
134 | 108 | <TableCell className="border-r"> |
135 | 109 | <div className="flex flex-row gap-2"> |
136 | | - <div>{formatCurrency(run.modelInfo?.inputPrice)}</div> |
| 110 | + <div>{formatCurrency(run.inputPrice)}</div> |
137 | 111 | <div className="opacity-25">/</div> |
138 | | - <div>{formatCurrency(run.modelInfo?.outputPrice)}</div> |
| 112 | + <div>{formatCurrency(run.outputPrice)}</div> |
139 | 113 | </div> |
140 | 114 | </TableCell> |
141 | 115 | <TableCell className="font-mono">{formatDuration(run.taskMetrics.duration)}</TableCell> |
@@ -167,58 +141,9 @@ export function Evals({ |
167 | 141 | ))} |
168 | 142 | </TableBody> |
169 | 143 | <TableCaption> |
170 | | - <div className="pb-4 font-medium">Cost Versus Score</div> |
171 | | - <ChartContainer config={chartConfig} className="h-[500px] w-full"> |
172 | | - <ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}> |
173 | | - <XAxis |
174 | | - type="number" |
175 | | - dataKey="cost" |
176 | | - name="Cost" |
177 | | - domain={[ |
178 | | - (dataMin: number) => Math.round((dataMin - 5) / 5) * 5, |
179 | | - (dataMax: number) => Math.round((dataMax + 5) / 5) * 5, |
180 | | - ]} |
181 | | - tickFormatter={(value) => formatCurrency(value)}> |
182 | | - <Label value="Cost" position="bottom" offset={0} /> |
183 | | - </XAxis> |
184 | | - <YAxis |
185 | | - type="number" |
186 | | - dataKey="score" |
187 | | - name="Score" |
188 | | - domain={[ |
189 | | - (dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5), |
190 | | - (dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5), |
191 | | - ]} |
192 | | - tickFormatter={(value) => `${value}%`}> |
193 | | - <Label value="Score" angle={-90} position="left" dy={-15} /> |
194 | | - </YAxis> |
195 | | - <ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} /> |
196 | | - <Customized component={renderQuadrant} /> |
197 | | - {chartData.map((d, i) => ( |
198 | | - <Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} /> |
199 | | - ))} |
200 | | - <ChartLegend content={<ChartLegendContent />} /> |
201 | | - </ScatterChart> |
202 | | - </ChartContainer> |
203 | | - <div className="py-4 text-xs opacity-50"> |
204 | | - (Note: Very expensive models are excluded from the scatter plot.) |
205 | | - </div> |
| 144 | + <Plot tableData={tableData} /> |
206 | 145 | </TableCaption> |
207 | 146 | </Table> |
208 | 147 | </div> |
209 | 148 | ) |
210 | 149 | } |
211 | | - |
212 | | -// eslint-disable-next-line @typescript-eslint/no-explicit-any |
213 | | -const renderQuadrant = (props: any) => ( |
214 | | - <Cross |
215 | | - width={props.width} |
216 | | - height={props.height} |
217 | | - x={props.width / 2 + 35} |
218 | | - y={props.height / 2 - 15} |
219 | | - top={0} |
220 | | - left={0} |
221 | | - stroke="currentColor" |
222 | | - opacity={0.1} |
223 | | - /> |
224 | | -) |
0 commit comments