diff --git a/apps/web-evals/scripts/check-services.sh b/apps/web-evals/scripts/check-services.sh index fd1e74997c0..104a4722088 100755 --- a/apps/web-evals/scripts/check-services.sh +++ b/apps/web-evals/scripts/check-services.sh @@ -7,13 +7,13 @@ fi if ! nc -z localhost 5432 2>/dev/null; then echo "❌ PostgreSQL is not running on port 5432" - echo "💡 Start it with: pnpm --filter @roo-code/evals db:start" + echo "💡 Start it with: pnpm --filter @roo-code/evals db:up" exit 1 fi if ! nc -z localhost 6379 2>/dev/null; then echo "❌ Redis is not running on port 6379" - echo "💡 Start it with: pnpm --filter @roo-code/evals redis:start" + echo "💡 Start it with: pnpm --filter @roo-code/evals redis:up" exit 1 fi diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 90717d6fec9..f8633611b61 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -350,7 +350,7 @@ export function NewRun() { name="timeout" render={({ field }) => ( - Timeout (minutes) + Timeout (Minutes)
field.onChange(value[0])} /> -
{field.value} min
+
{field.value}
diff --git a/package.json b/package.json index 61f1f6cdaf9..99becf0a0c7 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "changeset:version": "cp CHANGELOG.md src/CHANGELOG.md && changeset version && cp -vf src/CHANGELOG.md .", "knip": "knip --include files", "update-contributors": "node scripts/update-contributors.js", - "evals": "docker compose -f packages/evals/docker-compose.yml --profile server --profile runner up --build --scale runner=0" + "evals": "dotenvx run -f packages/evals/.env.development packages/evals/.env.local -- docker compose -f packages/evals/docker-compose.yml --profile server --profile runner up --build --scale runner=0" }, "devDependencies": { "@changesets/cli": "^2.27.10", diff --git a/packages/evals/package.json b/packages/evals/package.json index 3d1cfb3e929..83690a99c4d 100644 --- a/packages/evals/package.json +++ b/packages/evals/package.json @@ -18,11 +18,12 @@ "db:push": "pnpm drizzle-kit push", "db:test:push": "pnpm drizzle-kit:test push", "db:production:push": "pnpm drizzle-kit:production push", - "db:start": "docker compose up -d db", - "db:stop": "docker compose down db", - "redis:start": "docker compose up -d redis", - "redis:stop": "docker compose down redis", - "services:start": "docker compose up -d db redis" + "db:up": "dotenvx run -f .env.development .env.local -- docker compose up -d db", + "db:down": "dotenvx run -f .env.development .env.local -- docker compose down db", + "redis:up": "dotenvx run -f .env.development .env.local -- docker compose up -d redis", + "redis:down": "dotenvx run -f .env.development .env.local -- docker compose down redis", + "services:up": "dotenvx run -f .env.development .env.local -- docker compose up -d db redis", + "services:down": "dotenvx run -f .env.development .env.local -- docker compose down db redis" }, "dependencies": { "@roo-code/ipc": "workspace:^", diff --git a/packages/evals/src/db/migrations/0001_lowly_captain_flint.sql b/packages/evals/src/db/migrations/0001_lowly_captain_flint.sql new file mode 100644 index 00000000000..16d3cc1bddc --- /dev/null +++ b/packages/evals/src/db/migrations/0001_lowly_captain_flint.sql @@ -0,0 +1 @@ +ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL; \ No newline at end of file diff --git a/packages/evals/src/db/migrations/meta/0001_snapshot.json b/packages/evals/src/db/migrations/meta/0001_snapshot.json new file mode 100644 index 00000000000..194fd6055c7 --- /dev/null +++ b/packages/evals/src/db/migrations/meta/0001_snapshot.json @@ -0,0 +1,417 @@ +{ + "id": "43b197c4-ff4f-48c1-908b-a330e66a162d", + "prevId": "b50d5e6a-0f3f-4605-a5e7-9351711fc5e4", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.runs": { + "name": "runs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "runs_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "settings": { + "name": "settings", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "pid": { + "name": "pid", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "socket_path": { + "name": "socket_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "concurrency": { + "name": "concurrency", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 2 + }, + "timeout": { + "name": "timeout", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "passed": { + "name": "passed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "failed": { + "name": "failed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "runs_task_metrics_id_taskMetrics_id_fk": { + "name": "runs_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "runs", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.taskMetrics": { + "name": "taskMetrics", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "taskMetrics_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "tokens_in": { + "name": "tokens_in", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_out": { + "name": "tokens_out", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_context": { + "name": "tokens_context", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_writes": { + "name": "cache_writes", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_reads": { + "name": "cache_reads", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "real", + "primaryKey": false, + "notNull": true + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tool_usage": { + "name": "tool_usage", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.tasks": { + "name": "tasks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "tasks_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "exercise": { + "name": "exercise", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "passed": { + "name": "passed", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "started_at": { + "name": "started_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "tasks_language_exercise_idx": { + "name": "tasks_language_exercise_idx", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "language", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "exercise", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "tasks_run_id_runs_id_fk": { + "name": "tasks_run_id_runs_id_fk", + "tableFrom": "tasks", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "tasks_task_metrics_id_taskMetrics_id_fk": { + "name": "tasks_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "tasks", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.toolErrors": { + "name": "toolErrors", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "toolErrors_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "task_id": { + "name": "task_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "tool_name": { + "name": "tool_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "toolErrors_run_id_runs_id_fk": { + "name": "toolErrors_run_id_runs_id_fk", + "tableFrom": "toolErrors", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "toolErrors_task_id_tasks_id_fk": { + "name": "toolErrors_task_id_tasks_id_fk", + "tableFrom": "toolErrors", + "tableTo": "tasks", + "columnsFrom": ["task_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/evals/src/db/migrations/meta/_journal.json b/packages/evals/src/db/migrations/meta/_journal.json index b26aac5417c..e20425b1057 100644 --- a/packages/evals/src/db/migrations/meta/_journal.json +++ b/packages/evals/src/db/migrations/meta/_journal.json @@ -8,6 +8,13 @@ "when": 1748937674449, "tag": "0000_young_trauma", "breakpoints": true + }, + { + "idx": 1, + "version": "7", + "when": 1753198630651, + "tag": "0001_lowly_captain_flint", + "breakpoints": true } ] }