Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions evals/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ const runUnitTest = async ({ task }: { task: Task }) => {
})

console.log(
`${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] "${command.join(" ")}": ${subprocess.pid} -> ${JSON.stringify(descendants)}`,
`${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] "${command.join(" ")}": unit tests timed out, killing ${subprocess.pid} + ${JSON.stringify(descendants)}`,
)

if (descendants.length > 0) {
Expand All @@ -384,7 +384,10 @@ const runUnitTest = async ({ task }: { task: Task }) => {

await execa`kill -9 ${descendant}`
} catch (error) {
console.error("Error killing descendant processes:", error)
console.error(
`${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] Error killing descendant processes:`,
error,
)
}
}
}
Expand All @@ -396,7 +399,10 @@ const runUnitTest = async ({ task }: { task: Task }) => {
try {
await execa`kill -9 ${subprocess.pid!}`
} catch (error) {
console.error("Error killing process:", error)
console.error(
`${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] Error killing process:`,
error,
)
}
}, UNIT_TEST_TIMEOUT)

Expand Down
1 change: 1 addition & 0 deletions evals/packages/db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"@libsql/client": "^0.14.0",
"drizzle-orm": "^0.40.0",
"drizzle-zod": "^0.7.0",
"p-map": "^7.0.3",
"zod": "^3.24.2"
},
"devDependencies": {
Expand Down
51 changes: 28 additions & 23 deletions evals/packages/db/scripts/copy-run.mts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { drizzle } from "drizzle-orm/libsql"
import { eq } from "drizzle-orm"
import pMap from "p-map"

import { db as sourceDb } from "../src/db.js"
import { schema } from "../src/schema.js"
Expand Down Expand Up @@ -52,29 +53,33 @@ const copyRun = async (runId: number) => {

console.log(`Copying ${tasks.length} tasks`)

for (const task of tasks) {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { id: _, ...newTaskMetricsValues } = task.taskMetrics!
const [newTaskMetrics] = await destDb.insert(schema.taskMetrics).values(newTaskMetricsValues).returning()

if (!newTaskMetrics) {
throw new Error(`Failed to insert taskMetrics for task ${task.id}`)
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { id: __, ...newTaskValues } = task

const [newTask] = await destDb
.insert(schema.tasks)
.values({ ...newTaskValues, runId: newRun.id, taskMetricsId: newTaskMetrics.id })
.returning()

if (!newTask) {
throw new Error(`Failed to insert task ${task.id}`)
}
}

console.log(`Successfully copied run ${runId} with ${tasks.length} tasks`)
await pMap(
tasks,
async (task) => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { id: _, ...newTaskMetricsValues } = task.taskMetrics!
const [newTaskMetrics] = await destDb.insert(schema.taskMetrics).values(newTaskMetricsValues).returning()

if (!newTaskMetrics) {
throw new Error(`Failed to insert taskMetrics for task ${task.id}`)
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { id: __, ...newTaskValues } = task

const [newTask] = await destDb
.insert(schema.tasks)
.values({ ...newTaskValues, runId: newRun.id, taskMetricsId: newTaskMetrics.id })
.returning()

if (!newTask) {
throw new Error(`Failed to insert task ${task.id}`)
}
},
{ concurrency: 25 },
)

console.log(`\nSuccessfully copied run ${runId} with ${tasks.length} tasks`)
}

const main = async () => {
Expand Down
7 changes: 4 additions & 3 deletions evals/packages/types/src/roo-code-defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const rooCodeDefaults: RooCodeSettings = {

// modelTemperature: null,
// reasoningEffort: "high",
rateLimitSeconds: 0,

pinnedApiConfigs: {},
lastShownAnnouncementId: "apr-04-2025-boomerang",
Expand Down Expand Up @@ -45,11 +46,11 @@ export const rooCodeDefaults: RooCodeSettings = {

terminalOutputLineLimit: 500,
terminalShellIntegrationTimeout: 15_000,
// terminalCommandDelay: 0,
// terminalPowershellCounter: false,
terminalCommandDelay: 0,
terminalPowershellCounter: false,
terminalZshClearEolMark: true,
terminalZshOhMy: true,
// terminalZshP10k: false,
terminalZshP10k: false,
terminalZdotdir: true,

diffEnabled: true,
Expand Down
3 changes: 3 additions & 0 deletions evals/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.