Skip to content

Commit 7057e1f

Browse files
committed
Persist tool errors
1 parent 32f6deb commit 7057e1f

File tree

7 files changed

+55
-35
lines changed

7 files changed

+55
-35
lines changed

evals/apps/cli/src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import {
2929
updateTask,
3030
createTaskMetrics,
3131
updateTaskMetrics,
32+
createToolError,
3233
} from "@evals/db"
3334
import { IpcServer, IpcClient } from "@evals/ipc"
3435

@@ -255,6 +256,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
255256
rooTaskId = payload[0]
256257
}
257258

259+
if (eventName === RooCodeEventName.TaskToolFailed) {
260+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
261+
const [_taskId, toolName, error] = payload
262+
await createToolError({ taskId: task.id, toolName, error })
263+
}
264+
258265
if (
259266
(eventName === RooCodeEventName.TaskTokenUsageUpdated || eventName === RooCodeEventName.TaskCompleted) &&
260267
taskMetricsId

evals/packages/db/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ export * from "./schema.js"
33
export * from "./queries/runs.js"
44
export * from "./queries/tasks.js"
55
export * from "./queries/taskMetrics.js"
6+
export * from "./queries/toolErrors.js"

evals/packages/db/src/queries/runs.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ import { db } from "../db.js"
99
import { createTaskMetrics } from "./taskMetrics.js"
1010
import { getTasks } from "./tasks.js"
1111

12-
const table = schema.runs
13-
1412
export const findRun = async (id: number) => {
15-
const run = await db.query.runs.findFirst({ where: eq(table.id, id) })
13+
const run = await db.query.runs.findFirst({ where: eq(schema.runs.id, id) })
1614

1715
if (!run) {
1816
throw new RecordNotFoundError()
@@ -23,7 +21,7 @@ export const findRun = async (id: number) => {
2321

2422
export const createRun = async (args: InsertRun) => {
2523
const records = await db
26-
.insert(table)
24+
.insert(schema.runs)
2725
.values({
2826
...insertRunSchema.parse(args),
2927
createdAt: new Date(),
@@ -40,7 +38,7 @@ export const createRun = async (args: InsertRun) => {
4038
}
4139

4240
export const updateRun = async (id: number, values: UpdateRun) => {
43-
const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
41+
const records = await db.update(schema.runs).set(values).where(eq(schema.runs.id, id)).returning()
4442
const record = records[0]
4543

4644
if (!record) {
@@ -50,7 +48,8 @@ export const updateRun = async (id: number, values: UpdateRun) => {
5048
return record
5149
}
5250

53-
export const getRuns = async () => db.query.runs.findMany({ orderBy: desc(table.id), with: { taskMetrics: true } })
51+
export const getRuns = async () =>
52+
db.query.runs.findMany({ orderBy: desc(schema.runs.id), with: { taskMetrics: true } })
5453

5554
export const finishRun = async (runId: number) => {
5655
const [values] = await db
Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
import { eq, avg, min, max, and, isNotNull } from "drizzle-orm"
1+
import { eq } from "drizzle-orm"
22

33
import { RecordNotFoundError, RecordNotCreatedError } from "./errors.js"
44
import type { InsertTaskMetrics, UpdateTaskMetrics } from "../schema.js"
5-
import { insertTaskMetricsSchema, taskMetrics, tasks, runs } from "../schema.js"
5+
import { insertTaskMetricsSchema, taskMetrics } from "../schema.js"
66
import { db } from "../db.js"
77

8-
const table = taskMetrics
9-
108
export const findTaskMetrics = async (id: number) => {
11-
const run = await db.query.taskMetrics.findFirst({ where: eq(table.id, id) })
9+
const run = await db.query.taskMetrics.findFirst({ where: eq(taskMetrics.id, id) })
1210

1311
if (!run) {
1412
throw new RecordNotFoundError()
@@ -19,7 +17,7 @@ export const findTaskMetrics = async (id: number) => {
1917

2018
export const createTaskMetrics = async (args: InsertTaskMetrics) => {
2119
const records = await db
22-
.insert(table)
20+
.insert(taskMetrics)
2321
.values({
2422
...insertTaskMetricsSchema.parse(args),
2523
createdAt: new Date(),
@@ -36,7 +34,7 @@ export const createTaskMetrics = async (args: InsertTaskMetrics) => {
3634
}
3735

3836
export const updateTaskMetrics = async (id: number, values: UpdateTaskMetrics) => {
39-
const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
37+
const records = await db.update(taskMetrics).set(values).where(eq(taskMetrics.id, id)).returning()
4038
const record = records[0]
4139

4240
if (!record) {
@@ -45,18 +43,3 @@ export const updateTaskMetrics = async (id: number, values: UpdateTaskMetrics) =
4543

4644
return record
4745
}
48-
49-
export const successfulTaskDurations = async () => {
50-
return db
51-
.select({
52-
runId: tasks.runId,
53-
avgDuration: avg(taskMetrics.duration).mapWith(Number),
54-
minDuration: min(taskMetrics.duration).mapWith(Number),
55-
maxDuration: max(taskMetrics.duration).mapWith(Number),
56-
})
57-
.from(tasks)
58-
.innerJoin(taskMetrics, eq(tasks.taskMetricsId, taskMetrics.id))
59-
.innerJoin(runs, eq(tasks.runId, runs.id))
60-
.where(and(eq(tasks.passed, true), isNotNull(runs.taskMetricsId)))
61-
.groupBy(tasks.runId)
62-
}

evals/packages/db/src/queries/tasks.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@ import type { InsertTask, UpdateTask } from "../schema.js"
77
import { insertTaskSchema, tasks } from "../schema.js"
88
import { db } from "../db.js"
99

10-
const table = tasks
11-
1210
export const findTask = async (id: number) => {
13-
const run = await db.query.tasks.findFirst({ where: eq(table.id, id) })
11+
const run = await db.query.tasks.findFirst({ where: eq(tasks.id, id) })
1412

1513
if (!run) {
1614
throw new RecordNotFoundError()
@@ -21,7 +19,7 @@ export const findTask = async (id: number) => {
2119

2220
export const createTask = async (args: InsertTask) => {
2321
const records = await db
24-
.insert(table)
22+
.insert(tasks)
2523
.values({
2624
...insertTaskSchema.parse(args),
2725
createdAt: new Date(),
@@ -38,7 +36,7 @@ export const createTask = async (args: InsertTask) => {
3836
}
3937

4038
export const updateTask = async (id: number, values: UpdateTask) => {
41-
const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
39+
const records = await db.update(tasks).set(values).where(eq(tasks.id, id)).returning()
4240
const record = records[0]
4341

4442
if (!record) {
@@ -56,8 +54,8 @@ type GetTask = {
5654

5755
export const getTask = async ({ runId, language, exercise }: GetTask) =>
5856
db.query.tasks.findFirst({
59-
where: and(eq(table.runId, runId), eq(table.language, language), eq(table.exercise, exercise)),
57+
where: and(eq(tasks.runId, runId), eq(tasks.language, language), eq(tasks.exercise, exercise)),
6058
})
6159

6260
export const getTasks = async (runId: number) =>
63-
db.query.tasks.findMany({ where: eq(table.runId, runId), with: { taskMetrics: true } })
61+
db.query.tasks.findMany({ where: eq(tasks.runId, runId), with: { taskMetrics: true } })
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { RecordNotCreatedError } from "./errors.js"
2+
import type { InsertToolError } from "../schema.js"
3+
import { insertToolErrorSchema, toolErrors } from "../schema.js"
4+
import { db } from "../db.js"
5+
6+
export const createToolError = async (args: InsertToolError) => {
7+
const records = await db
8+
.insert(toolErrors)
9+
.values({
10+
...insertToolErrorSchema.parse(args),
11+
createdAt: new Date(),
12+
})
13+
.returning()
14+
15+
const record = records[0]
16+
17+
if (!record) {
18+
throw new RecordNotCreatedError()
19+
}
20+
21+
return record
22+
}

evals/packages/db/src/schema.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,16 @@ export const toolErrorsRelations = relations(toolErrors, ({ one }) => ({
123123
task: one(tasks, { fields: [toolErrors.taskId], references: [tasks.id] }),
124124
}))
125125

126+
export type ToolError = typeof toolErrors.$inferSelect
127+
128+
export const insertToolErrorSchema = createInsertSchema(toolErrors)
129+
.omit({ id: true, createdAt: true })
130+
.extend({ toolUsage: toolUsageSchema.optional() })
131+
132+
export type InsertToolError = Omit<typeof toolErrors.$inferInsert, "id" | "createdAt">
133+
134+
export type UpdateToolError = Partial<Omit<ToolError, "id" | "createdAt">>
135+
126136
/**
127137
* schema
128138
*/

0 commit comments

Comments
 (0)