Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion evals/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
})
}

if (eventName === RooCodeEventName.TaskCompleted || eventName === RooCodeEventName.TaskAborted) {
if (eventName === RooCodeEventName.TaskCompleted && taskMetricsId) {
const toolUsage = payload[2]
await updateTaskMetrics(taskMetricsId, { toolUsage })
}

if (eventName === RooCodeEventName.TaskAborted || eventName === RooCodeEventName.TaskCompleted) {
taskFinishedAt = Date.now()
await updateTask(task.id, { finishedAt: new Date() })
}
Expand Down
1 change: 1 addition & 0 deletions evals/packages/db/drizzle/0003_familiar_miss_america.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE `taskMetrics` ADD `toolUsage` blob;
296 changes: 296 additions & 0 deletions evals/packages/db/drizzle/meta/0003_snapshot.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
{
"version": "6",
"dialect": "sqlite",
"id": "a7a893e2-373a-4706-bcd4-772e2525db62",
"prevId": "f49d9b0b-fda9-467a-9adb-c941d6cbf7ce",
"tables": {
"runs": {
"name": "runs",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"taskMetricsId": {
"name": "taskMetricsId",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"settings": {
"name": "settings",
"type": "blob",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"pid": {
"name": "pid",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"socketPath": {
"name": "socketPath",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"concurrency": {
"name": "concurrency",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": 2
},
"passed": {
"name": "passed",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": 0
},
"failed": {
"name": "failed",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": 0
},
"createdAt": {
"name": "createdAt",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"runs_taskMetricsId_taskMetrics_id_fk": {
"name": "runs_taskMetricsId_taskMetrics_id_fk",
"tableFrom": "runs",
"tableTo": "taskMetrics",
"columnsFrom": ["taskMetricsId"],
"columnsTo": ["id"],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"taskMetrics": {
"name": "taskMetrics",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"tokensIn": {
"name": "tokensIn",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tokensOut": {
"name": "tokensOut",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tokensContext": {
"name": "tokensContext",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"cacheWrites": {
"name": "cacheWrites",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"cacheReads": {
"name": "cacheReads",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"cost": {
"name": "cost",
"type": "real",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"duration": {
"name": "duration",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"toolUsage": {
"name": "toolUsage",
"type": "blob",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"createdAt": {
"name": "createdAt",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"tasks": {
"name": "tasks",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"runId": {
"name": "runId",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"taskMetricsId": {
"name": "taskMetricsId",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"exercise": {
"name": "exercise",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"passed": {
"name": "passed",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"startedAt": {
"name": "startedAt",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"finishedAt": {
"name": "finishedAt",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"createdAt": {
"name": "createdAt",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"tasks_language_exercise_idx": {
"name": "tasks_language_exercise_idx",
"columns": ["runId", "language", "exercise"],
"isUnique": true
}
},
"foreignKeys": {
"tasks_runId_runs_id_fk": {
"name": "tasks_runId_runs_id_fk",
"tableFrom": "tasks",
"tableTo": "runs",
"columnsFrom": ["runId"],
"columnsTo": ["id"],
"onDelete": "no action",
"onUpdate": "no action"
},
"tasks_taskMetricsId_taskMetrics_id_fk": {
"name": "tasks_taskMetricsId_taskMetrics_id_fk",
"tableFrom": "tasks",
"tableTo": "taskMetrics",
"columnsFrom": ["taskMetricsId"],
"columnsTo": ["id"],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}
7 changes: 7 additions & 0 deletions evals/packages/db/drizzle/meta/_journal.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
"when": 1743698195142,
"tag": "0002_white_flatman",
"breakpoints": true
},
{
"idx": 3,
"version": "6",
"when": 1744933023667,
"tag": "0003_familiar_miss_america",
"breakpoints": true
}
]
}
7 changes: 5 additions & 2 deletions evals/packages/db/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm
import { relations } from "drizzle-orm"
import { createInsertSchema } from "drizzle-zod"

import { RooCodeSettings, exerciseLanguages, rooCodeSettingsSchema } from "@evals/types"
import { RooCodeSettings, ToolUsage, exerciseLanguages, rooCodeSettingsSchema, toolUsageSchema } from "@evals/types"

/**
* runs
Expand Down Expand Up @@ -84,12 +84,15 @@ export const taskMetrics = sqliteTable("taskMetrics", {
cacheReads: integer({ mode: "number" }).notNull(),
cost: real().notNull(),
duration: integer({ mode: "number" }).notNull(),
toolUsage: blob({ mode: "json" }).$type<ToolUsage>(),
createdAt: integer({ mode: "timestamp" }).notNull(),
})

export type TaskMetrics = typeof taskMetrics.$inferSelect

export const insertTaskMetricsSchema = createInsertSchema(taskMetrics).omit({ id: true, createdAt: true })
export const insertTaskMetricsSchema = createInsertSchema(taskMetrics)
.omit({ id: true, createdAt: true })
.extend({ toolUsage: toolUsageSchema.optional() })

export type InsertTaskMetrics = Omit<typeof taskMetrics.$inferInsert, "id" | "createdAt">

Expand Down
Loading