Skip to content

Commit a747ce6

Browse files
committed
Add diff edit metrics
1 parent b7e36ab commit a747ce6

File tree

7 files changed

+62
-50
lines changed

7 files changed

+62
-50
lines changed

evals/apps/web/src/app/home.tsx

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { Ellipsis, Rocket } from "lucide-react"
88
import type { Run, TaskMetrics } from "@evals/db"
99

1010
import { deleteRun } from "@/lib/server/runs"
11-
import { formatCurrency, formatDuration, formatTokens } from "@/lib"
11+
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
1212
import {
1313
Button,
1414
Table,
@@ -59,7 +59,8 @@ export function Home({ runs }: { runs: (Run & { taskMetrics: TaskMetrics | null
5959
<TableHead>Passed</TableHead>
6060
<TableHead>Failed</TableHead>
6161
<TableHead>% Correct</TableHead>
62-
<TableHead className="text-center">Tokens In / Out</TableHead>
62+
<TableHead>Tokens In / Out</TableHead>
63+
<TableHead>Diff Edits</TableHead>
6364
<TableHead>Cost</TableHead>
6465
<TableHead>Duration</TableHead>
6566
<TableHead />
@@ -79,12 +80,21 @@ export function Home({ runs }: { runs: (Run & { taskMetrics: TaskMetrics | null
7980
</TableCell>
8081
<TableCell>
8182
{taskMetrics && (
82-
<div className="flex items-center justify-evenly">
83+
<div className="flex items-center gap-1.5">
8384
<div>{formatTokens(taskMetrics.tokensIn)}</div>/
8485
<div>{formatTokens(taskMetrics.tokensOut)}</div>
8586
</div>
8687
)}
8788
</TableCell>
89+
<TableCell>
90+
{taskMetrics?.toolUsage?.apply_diff && (
91+
<div className="flex flex-row items-center gap-1.5">
92+
<div>{taskMetrics.toolUsage.apply_diff.attempts}</div>
93+
<div>/</div>
94+
<div>{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}</div>
95+
</div>
96+
)}
97+
</TableCell>
8898
<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
8999
<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
90100
<TableCell>

evals/apps/web/src/app/runs/[id]/run.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { LoaderCircle } from "lucide-react"
55

66
import * as db from "@evals/db"
77

8-
import { formatCurrency, formatDuration, formatTokens } from "@/lib"
8+
import { formatCurrency, formatDuration, formatTokens } from "@/lib/formatters"
99
import { useRunStatus } from "@/hooks/use-run-status"
1010
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
1111

evals/apps/web/src/lib/format-currency.ts

Lines changed: 0 additions & 6 deletions
This file was deleted.

evals/apps/web/src/lib/format-duration.ts

Lines changed: 0 additions & 22 deletions
This file was deleted.

evals/apps/web/src/lib/format-tokens.ts

Lines changed: 0 additions & 15 deletions
This file was deleted.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
const formatter = new Intl.NumberFormat("en-US", {
2+
style: "currency",
3+
currency: "USD",
4+
})
5+
6+
export const formatCurrency = (amount: number) => formatter.format(amount)
7+
8+
export const formatDuration = (durationMs: number) => {
9+
const seconds = Math.floor(durationMs / 1000)
10+
const hours = Math.floor(seconds / 3600)
11+
const minutes = Math.floor((seconds % 3600) / 60)
12+
const remainingSeconds = seconds % 60
13+
14+
const parts = []
15+
16+
if (hours > 0) {
17+
parts.push(`${hours}h`)
18+
}
19+
20+
if (minutes > 0) {
21+
parts.push(`${minutes}m`)
22+
}
23+
24+
if (remainingSeconds > 0 || parts.length === 0) {
25+
parts.push(`${remainingSeconds}s`)
26+
}
27+
28+
return parts.join(" ")
29+
}
30+
31+
export const formatTokens = (tokens: number) => {
32+
if (tokens < 1000) {
33+
return tokens.toString()
34+
}
35+
36+
if (tokens < 1000000) {
37+
return `${(tokens / 1000).toFixed(1)}k`
38+
}
39+
40+
if (tokens < 1000000000) {
41+
return `${(tokens / 1000000).toFixed(1)}M`
42+
}
43+
44+
return `${(tokens / 1000000000).toFixed(1)}B`
45+
}
46+
47+
export const formatToolUsageSuccessRate = (usage: { attempts: number; failures: number }) =>
48+
`${(((usage.attempts - usage.failures) / usage.attempts) * 100).toFixed(1)}%`

evals/apps/web/src/lib/index.ts

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)