Skip to content

Commit b56ae14

Browse files
committed
Persist toolUsage in taskMetrics
1 parent 1eac22b commit b56ae14

File tree

19 files changed

+454
-75
lines changed

19 files changed

+454
-75
lines changed

evals/apps/cli/src/index.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
275275
})
276276
}
277277

278-
if (eventName === RooCodeEventName.TaskCompleted || eventName === RooCodeEventName.TaskAborted) {
278+
if (eventName === RooCodeEventName.TaskCompleted && taskMetricsId) {
279+
const toolUsage = payload[2]
280+
await updateTaskMetrics(taskMetricsId, { toolUsage })
281+
}
282+
283+
if (eventName === RooCodeEventName.TaskAborted || eventName === RooCodeEventName.TaskCompleted) {
279284
taskFinishedAt = Date.now()
280285
await updateTask(task.id, { finishedAt: new Date() })
281286
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE `taskMetrics` ADD `toolUsage` blob;
Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
{
2+
"version": "6",
3+
"dialect": "sqlite",
4+
"id": "a7a893e2-373a-4706-bcd4-772e2525db62",
5+
"prevId": "f49d9b0b-fda9-467a-9adb-c941d6cbf7ce",
6+
"tables": {
7+
"runs": {
8+
"name": "runs",
9+
"columns": {
10+
"id": {
11+
"name": "id",
12+
"type": "integer",
13+
"primaryKey": true,
14+
"notNull": true,
15+
"autoincrement": true
16+
},
17+
"taskMetricsId": {
18+
"name": "taskMetricsId",
19+
"type": "integer",
20+
"primaryKey": false,
21+
"notNull": false,
22+
"autoincrement": false
23+
},
24+
"model": {
25+
"name": "model",
26+
"type": "text",
27+
"primaryKey": false,
28+
"notNull": true,
29+
"autoincrement": false
30+
},
31+
"description": {
32+
"name": "description",
33+
"type": "text",
34+
"primaryKey": false,
35+
"notNull": false,
36+
"autoincrement": false
37+
},
38+
"settings": {
39+
"name": "settings",
40+
"type": "blob",
41+
"primaryKey": false,
42+
"notNull": false,
43+
"autoincrement": false
44+
},
45+
"pid": {
46+
"name": "pid",
47+
"type": "integer",
48+
"primaryKey": false,
49+
"notNull": false,
50+
"autoincrement": false
51+
},
52+
"socketPath": {
53+
"name": "socketPath",
54+
"type": "text",
55+
"primaryKey": false,
56+
"notNull": true,
57+
"autoincrement": false
58+
},
59+
"concurrency": {
60+
"name": "concurrency",
61+
"type": "integer",
62+
"primaryKey": false,
63+
"notNull": true,
64+
"autoincrement": false,
65+
"default": 2
66+
},
67+
"passed": {
68+
"name": "passed",
69+
"type": "integer",
70+
"primaryKey": false,
71+
"notNull": true,
72+
"autoincrement": false,
73+
"default": 0
74+
},
75+
"failed": {
76+
"name": "failed",
77+
"type": "integer",
78+
"primaryKey": false,
79+
"notNull": true,
80+
"autoincrement": false,
81+
"default": 0
82+
},
83+
"createdAt": {
84+
"name": "createdAt",
85+
"type": "integer",
86+
"primaryKey": false,
87+
"notNull": true,
88+
"autoincrement": false
89+
}
90+
},
91+
"indexes": {},
92+
"foreignKeys": {
93+
"runs_taskMetricsId_taskMetrics_id_fk": {
94+
"name": "runs_taskMetricsId_taskMetrics_id_fk",
95+
"tableFrom": "runs",
96+
"tableTo": "taskMetrics",
97+
"columnsFrom": ["taskMetricsId"],
98+
"columnsTo": ["id"],
99+
"onDelete": "no action",
100+
"onUpdate": "no action"
101+
}
102+
},
103+
"compositePrimaryKeys": {},
104+
"uniqueConstraints": {},
105+
"checkConstraints": {}
106+
},
107+
"taskMetrics": {
108+
"name": "taskMetrics",
109+
"columns": {
110+
"id": {
111+
"name": "id",
112+
"type": "integer",
113+
"primaryKey": true,
114+
"notNull": true,
115+
"autoincrement": true
116+
},
117+
"tokensIn": {
118+
"name": "tokensIn",
119+
"type": "integer",
120+
"primaryKey": false,
121+
"notNull": true,
122+
"autoincrement": false
123+
},
124+
"tokensOut": {
125+
"name": "tokensOut",
126+
"type": "integer",
127+
"primaryKey": false,
128+
"notNull": true,
129+
"autoincrement": false
130+
},
131+
"tokensContext": {
132+
"name": "tokensContext",
133+
"type": "integer",
134+
"primaryKey": false,
135+
"notNull": true,
136+
"autoincrement": false
137+
},
138+
"cacheWrites": {
139+
"name": "cacheWrites",
140+
"type": "integer",
141+
"primaryKey": false,
142+
"notNull": true,
143+
"autoincrement": false
144+
},
145+
"cacheReads": {
146+
"name": "cacheReads",
147+
"type": "integer",
148+
"primaryKey": false,
149+
"notNull": true,
150+
"autoincrement": false
151+
},
152+
"cost": {
153+
"name": "cost",
154+
"type": "real",
155+
"primaryKey": false,
156+
"notNull": true,
157+
"autoincrement": false
158+
},
159+
"duration": {
160+
"name": "duration",
161+
"type": "integer",
162+
"primaryKey": false,
163+
"notNull": true,
164+
"autoincrement": false
165+
},
166+
"toolUsage": {
167+
"name": "toolUsage",
168+
"type": "blob",
169+
"primaryKey": false,
170+
"notNull": false,
171+
"autoincrement": false
172+
},
173+
"createdAt": {
174+
"name": "createdAt",
175+
"type": "integer",
176+
"primaryKey": false,
177+
"notNull": true,
178+
"autoincrement": false
179+
}
180+
},
181+
"indexes": {},
182+
"foreignKeys": {},
183+
"compositePrimaryKeys": {},
184+
"uniqueConstraints": {},
185+
"checkConstraints": {}
186+
},
187+
"tasks": {
188+
"name": "tasks",
189+
"columns": {
190+
"id": {
191+
"name": "id",
192+
"type": "integer",
193+
"primaryKey": true,
194+
"notNull": true,
195+
"autoincrement": true
196+
},
197+
"runId": {
198+
"name": "runId",
199+
"type": "integer",
200+
"primaryKey": false,
201+
"notNull": true,
202+
"autoincrement": false
203+
},
204+
"taskMetricsId": {
205+
"name": "taskMetricsId",
206+
"type": "integer",
207+
"primaryKey": false,
208+
"notNull": false,
209+
"autoincrement": false
210+
},
211+
"language": {
212+
"name": "language",
213+
"type": "text",
214+
"primaryKey": false,
215+
"notNull": true,
216+
"autoincrement": false
217+
},
218+
"exercise": {
219+
"name": "exercise",
220+
"type": "text",
221+
"primaryKey": false,
222+
"notNull": true,
223+
"autoincrement": false
224+
},
225+
"passed": {
226+
"name": "passed",
227+
"type": "integer",
228+
"primaryKey": false,
229+
"notNull": false,
230+
"autoincrement": false
231+
},
232+
"startedAt": {
233+
"name": "startedAt",
234+
"type": "integer",
235+
"primaryKey": false,
236+
"notNull": false,
237+
"autoincrement": false
238+
},
239+
"finishedAt": {
240+
"name": "finishedAt",
241+
"type": "integer",
242+
"primaryKey": false,
243+
"notNull": false,
244+
"autoincrement": false
245+
},
246+
"createdAt": {
247+
"name": "createdAt",
248+
"type": "integer",
249+
"primaryKey": false,
250+
"notNull": true,
251+
"autoincrement": false
252+
}
253+
},
254+
"indexes": {
255+
"tasks_language_exercise_idx": {
256+
"name": "tasks_language_exercise_idx",
257+
"columns": ["runId", "language", "exercise"],
258+
"isUnique": true
259+
}
260+
},
261+
"foreignKeys": {
262+
"tasks_runId_runs_id_fk": {
263+
"name": "tasks_runId_runs_id_fk",
264+
"tableFrom": "tasks",
265+
"tableTo": "runs",
266+
"columnsFrom": ["runId"],
267+
"columnsTo": ["id"],
268+
"onDelete": "no action",
269+
"onUpdate": "no action"
270+
},
271+
"tasks_taskMetricsId_taskMetrics_id_fk": {
272+
"name": "tasks_taskMetricsId_taskMetrics_id_fk",
273+
"tableFrom": "tasks",
274+
"tableTo": "taskMetrics",
275+
"columnsFrom": ["taskMetricsId"],
276+
"columnsTo": ["id"],
277+
"onDelete": "no action",
278+
"onUpdate": "no action"
279+
}
280+
},
281+
"compositePrimaryKeys": {},
282+
"uniqueConstraints": {},
283+
"checkConstraints": {}
284+
}
285+
},
286+
"views": {},
287+
"enums": {},
288+
"_meta": {
289+
"schemas": {},
290+
"tables": {},
291+
"columns": {}
292+
},
293+
"internal": {
294+
"indexes": {}
295+
}
296+
}

evals/packages/db/drizzle/meta/_journal.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@
2222
"when": 1743698195142,
2323
"tag": "0002_white_flatman",
2424
"breakpoints": true
25+
},
26+
{
27+
"idx": 3,
28+
"version": "6",
29+
"when": 1744933023667,
30+
"tag": "0003_familiar_miss_america",
31+
"breakpoints": true
2532
}
2633
]
2734
}

evals/packages/db/src/schema.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm
22
import { relations } from "drizzle-orm"
33
import { createInsertSchema } from "drizzle-zod"
44

5-
import { RooCodeSettings, exerciseLanguages, rooCodeSettingsSchema } from "@evals/types"
5+
import { RooCodeSettings, ToolUsage, exerciseLanguages, rooCodeSettingsSchema, toolUsageSchema } from "@evals/types"
66

77
/**
88
* runs
@@ -84,12 +84,15 @@ export const taskMetrics = sqliteTable("taskMetrics", {
8484
cacheReads: integer({ mode: "number" }).notNull(),
8585
cost: real().notNull(),
8686
duration: integer({ mode: "number" }).notNull(),
87+
toolUsage: blob({ mode: "json" }).$type<ToolUsage>(),
8788
createdAt: integer({ mode: "timestamp" }).notNull(),
8889
})
8990

9091
export type TaskMetrics = typeof taskMetrics.$inferSelect
9192

92-
export const insertTaskMetricsSchema = createInsertSchema(taskMetrics).omit({ id: true, createdAt: true })
93+
export const insertTaskMetricsSchema = createInsertSchema(taskMetrics)
94+
.omit({ id: true, createdAt: true })
95+
.extend({ toolUsage: toolUsageSchema.optional() })
9396

9497
export type InsertTaskMetrics = Omit<typeof taskMetrics.$inferInsert, "id" | "createdAt">
9598

0 commit comments

Comments
 (0)