Skip to content

Commit 028b656

Browse files
committed
feat: add context window and pricing columns to evals database
- Added contextWindow, pricePerMillionInputTokens, and pricePerMillionOutputTokens columns to runs table - Updated OpenRouter models hook to fetch and cache full model data including context and pricing - Enhanced UI to display context window and pricing information in runs list and details pages - Generated database migration for new columns
1 parent 079b37a commit 028b656

File tree

9 files changed

+555
-17
lines changed

9 files changed

+555
-17
lines changed

apps/web-evals/src/actions/runs.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,21 @@ import { CreateRun } from "@/lib/schemas"
2121

2222
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2323

24-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
25-
export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) {
24+
export async function createRun({
25+
suite,
26+
exercises = [],
27+
timeout,
28+
contextWindow,
29+
pricePerMillionInputTokens,
30+
pricePerMillionOutputTokens,
31+
...values
32+
}: CreateRun & { contextWindow?: number; pricePerMillionInputTokens?: number; pricePerMillionOutputTokens?: number }) {
2633
const run = await _createRun({
2734
...values,
2835
timeout,
36+
contextWindow,
37+
pricePerMillionInputTokens,
38+
pricePerMillionOutputTokens,
2939
socketPath: "", // TODO: Get rid of this.
3040
})
3141

apps/web-evals/src/app/runs/[id]/run.tsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ export function Run({ run }: { run: Run }) {
4747
<div className="mb-2">
4848
<div>
4949
<div className="font-mono">{run.model}</div>
50+
<div className="flex gap-4 text-sm text-muted-foreground">
51+
{run.contextWindow && <span>Context: {(run.contextWindow / 1000).toFixed(0)}k tokens</span>}
52+
{(run.pricePerMillionInputTokens || run.pricePerMillionOutputTokens) && (
53+
<span>
54+
Pricing: ${run.pricePerMillionInputTokens?.toFixed(2) || "?"} / $
55+
{run.pricePerMillionOutputTokens?.toFixed(2) || "?"} per 1M tokens
56+
</span>
57+
)}
58+
</div>
5059
{run.description && <div className="text-sm text-muted-foreground">{run.description}</div>}
5160
</div>
5261
{!run.taskMetricsId && <RunStatus runStatus={runStatus} />}

apps/web-evals/src/app/runs/new/new-run.tsx

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import {
2626
TIMEOUT_DEFAULT,
2727
} from "@/lib/schemas"
2828
import { cn } from "@/lib/utils"
29-
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
29+
import { useOpenRouterModels, getModelDetails, getPricingPerMillion } from "@/hooks/use-open-router-models"
3030
import {
3131
Button,
3232
FormControl,
@@ -95,6 +95,21 @@ export function NewRun() {
9595
try {
9696
if (mode === "openrouter") {
9797
values.settings = { ...(values.settings || {}), openRouterModelId: model }
98+
99+
// Get model details and add to the run
100+
const modelDetails = getModelDetails(models.data, model)
101+
if (modelDetails) {
102+
const pricing = getPricingPerMillion(modelDetails.pricing)
103+
const extendedValues = {
104+
...values,
105+
contextWindow: modelDetails.context_length,
106+
pricePerMillionInputTokens: pricing.input,
107+
pricePerMillionOutputTokens: pricing.output,
108+
}
109+
const { id } = await createRun(extendedValues)
110+
router.push(`/runs/${id}`)
111+
return
112+
}
98113
}
99114

100115
const { id } = await createRun(values)
@@ -103,7 +118,7 @@ export function NewRun() {
103118
toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
104119
}
105120
},
106-
[mode, model, router],
121+
[mode, model, models.data, router],
107122
)
108123

109124
const onFilterModels = useCallback(
@@ -112,13 +127,12 @@ export function NewRun() {
112127
modelSearchValueRef.current = search
113128
modelSearchResultsRef.current.clear()
114129

115-
for (const {
116-
obj: { id },
117-
score,
118-
} of fuzzysort.go(search, models.data || [], {
130+
const results = fuzzysort.go(search, models.data || [], {
119131
key: "name",
120-
})) {
121-
modelSearchResultsRef.current.set(id, score)
132+
})
133+
134+
for (const result of results) {
135+
modelSearchResultsRef.current.set(result.obj.id, result.score)
122136
}
123137
}
124138

@@ -210,16 +224,18 @@ export function NewRun() {
210224
<CommandList>
211225
<CommandEmpty>No model found.</CommandEmpty>
212226
<CommandGroup>
213-
{models.data?.map(({ id, name }) => (
227+
{models.data?.map((modelItem) => (
214228
<CommandItem
215-
key={id}
216-
value={id}
229+
key={modelItem.id}
230+
value={modelItem.id}
217231
onSelect={onSelectModel}>
218-
{name}
232+
{modelItem.name}
219233
<Check
220234
className={cn(
221235
"ml-auto text-accent group-data-[selected=true]:text-accent-foreground size-4",
222-
id === model ? "opacity-100" : "opacity-0",
236+
modelItem.id === model
237+
? "opacity-100"
238+
: "opacity-0",
223239
)}
224240
/>
225241
</CommandItem>

apps/web-evals/src/components/home/run.tsx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,16 @@ export function Run({ run, taskMetrics }: RunProps) {
5151
return (
5252
<>
5353
<TableRow>
54-
<TableCell>{run.model}</TableCell>
54+
<TableCell>
55+
<div>
56+
<div>{run.model}</div>
57+
{run.contextWindow && (
58+
<div className="text-xs text-muted-foreground">
59+
{(run.contextWindow / 1000).toFixed(0)}k context
60+
</div>
61+
)}
62+
</div>
63+
</TableCell>
5564
<TableCell>{run.passed}</TableCell>
5665
<TableCell>{run.failed}</TableCell>
5766
<TableCell>
@@ -76,7 +85,19 @@ export function Run({ run, taskMetrics }: RunProps) {
7685
</div>
7786
)}
7887
</TableCell>
79-
<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
88+
<TableCell>
89+
{taskMetrics && (
90+
<div>
91+
<div>{formatCurrency(taskMetrics.cost)}</div>
92+
{(run.pricePerMillionInputTokens || run.pricePerMillionOutputTokens) && (
93+
<div className="text-xs text-muted-foreground">
94+
${run.pricePerMillionInputTokens?.toFixed(2) || "?"}/$
95+
{run.pricePerMillionOutputTokens?.toFixed(2) || "?"}/M
96+
</div>
97+
)}
98+
</div>
99+
)}
100+
</TableCell>
80101
<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
81102
<TableCell>
82103
<DropdownMenu>

apps/web-evals/src/hooks/use-open-router-models.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
import { z } from "zod"
22
import { useQuery } from "@tanstack/react-query"
33

4+
// Extended schema to include context window and pricing information
45
export const openRouterModelSchema = z.object({
56
id: z.string(),
67
name: z.string(),
8+
context_length: z.number().optional(),
9+
pricing: z
10+
.object({
11+
prompt: z.union([z.string(), z.number()]).optional(),
12+
completion: z.union([z.string(), z.number()]).optional(),
13+
})
14+
.optional(),
715
})
816

917
export type OpenRouterModel = z.infer<typeof openRouterModelSchema>
@@ -29,4 +37,30 @@ export const useOpenRouterModels = () =>
2937
useQuery({
3038
queryKey: ["getOpenRouterModels"],
3139
queryFn: getOpenRouterModels,
40+
staleTime: 1000 * 60 * 60, // Cache for 1 hour
41+
gcTime: 1000 * 60 * 60 * 24, // Keep in cache for 24 hours (gcTime replaces cacheTime in v5)
3242
})
43+
44+
// Helper function to get model details by ID
45+
export const getModelDetails = (models: OpenRouterModel[] | undefined, modelId: string) => {
46+
if (!models) return null
47+
return models.find((m) => m.id === modelId)
48+
}
49+
50+
// Helper function to convert pricing to per-million tokens
51+
export const getPricingPerMillion = (pricing: OpenRouterModel["pricing"]) => {
52+
if (!pricing) return { input: undefined, output: undefined }
53+
54+
const parsePrice = (price: string | number | undefined): number | undefined => {
55+
if (price === undefined) return undefined
56+
const numPrice = typeof price === "string" ? parseFloat(price) : price
57+
if (isNaN(numPrice)) return undefined
58+
// OpenRouter prices are typically per token, convert to per million
59+
return numPrice * 1_000_000
60+
}
61+
62+
return {
63+
input: parsePrice(pricing.prompt),
64+
output: parsePrice(pricing.completion),
65+
}
66+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE "runs" ADD COLUMN "context_window" integer;--> statement-breakpoint
2+
ALTER TABLE "runs" ADD COLUMN "price_per_million_input_tokens" real;--> statement-breakpoint
3+
ALTER TABLE "runs" ADD COLUMN "price_per_million_output_tokens" real;

0 commit comments

Comments
 (0)