From 4b46ff66f862109ffc089e387055e1ab4dd0a6f1 Mon Sep 17 00:00:00 2001 From: Paul Gschwendtner Date: Fri, 10 Oct 2025 14:25:13 +0000 Subject: [PATCH] feat: support controlling build repair attempts Support controlling build repair attempts. --- README.md | 2 ++ runner/configuration/constants.ts | 1 + runner/eval-cli.ts | 8 ++++++++ runner/orchestration/build-serve-loop.ts | 6 +++--- runner/shared-interfaces.ts | 1 + 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8604847..b76634c 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,8 @@ You can customize the `web-codegen-scorer eval` script with the following flags: - `--mcp`: Whether to start an MCP for the evaluation. Defaults to `false`. - Example: `web-codegen-scorer eval --mcp --env=` +-- `--max-build-repair-attempts`: Number of repair attempts when build errors are discovered. Defaults to `1` attempt. + - `--help`: Prints out usage information about the script. ### Additional configuration options diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts index dd83f1f..3151ec1 100644 --- a/runner/configuration/constants.ts +++ b/runner/configuration/constants.ts @@ -24,6 +24,7 @@ export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output'); * Number of times we'll try to ask LLM to repair a build failure, * providing the build output and the code that causes the problem. */ +// Note: When updating, also adjust the default description in `README.md`. export const DEFAULT_MAX_REPAIR_ATTEMPTS = 1; /** Name of the folder where we store all generated reports */ diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts index b6fdd19..39077ca 100644 --- a/runner/eval-cli.ts +++ b/runner/eval-cli.ts @@ -3,6 +3,7 @@ import chalk from 'chalk'; import { BUILT_IN_ENVIRONMENTS, DEFAULT_AUTORATER_MODEL_NAME, + DEFAULT_MAX_REPAIR_ATTEMPTS, DEFAULT_MODEL_NAME, } from './configuration/constants.js'; import {generateCodeAndAssess} from './orchestration/generate.js'; @@ -39,6 +40,7 @@ interface Options { a11yRepairAttempts?: number; logging?: 'text-only' | 'dynamic'; skipLighthouse?: boolean; + maxBuildRepairAttempts?: number; } function builder(argv: Argv): Argv { @@ -159,6 +161,11 @@ function builder(argv: Argv): Argv { default: false, description: 'Whether to skip collecting Lighthouse data', }) + .option('max-build-repair-attempts', { + type: 'number', + default: DEFAULT_MAX_REPAIR_ATTEMPTS, + description: 'Number of repair attempts when build errors are discovered', + }) .strict() .version(false) .help() @@ -204,6 +211,7 @@ async function handler(cliArgs: Arguments): Promise { skipAiSummary: cliArgs.skipAiSummary, a11yRepairAttempts: cliArgs.a11yRepairAttempts, skipLighthouse: cliArgs.skipLighthouse, + maxBuildRepairAttempts: cliArgs.maxBuildRepairAttempts, }); logReportToConsole(runInfo); diff --git a/runner/orchestration/build-serve-loop.ts b/runner/orchestration/build-serve-loop.ts index 67074eb..f543add 100644 --- a/runner/orchestration/build-serve-loop.ts +++ b/runner/orchestration/build-serve-loop.ts @@ -8,13 +8,13 @@ import { LlmContextFile, RootPromptDefinition, } from '../shared-interfaces.js'; -import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js'; import {ProgressLogger} from '../progress/progress-logger.js'; import {runBuild} from './build-worker.js'; import {repairAndBuild} from './build-repair.js'; -import {EvalID, Executor} from './executors/executor.js'; +import {EvalID} from './executors/executor.js'; import {serveAndTestApp} from './serve-testing-worker.js'; import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; +import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js'; /** * Attempts to build the code that an LLM generated. If the build fails, attempts @@ -59,7 +59,7 @@ export async function attemptBuild( ); let repairAttempts = 0; const maxRepairAttempts = (await env.executor.shouldRepairFailedBuilds(evalID)) - ? DEFAULT_MAX_REPAIR_ATTEMPTS + ? (config.maxBuildRepairAttempts ?? DEFAULT_MAX_REPAIR_ATTEMPTS) : 0; const initialAttempt = { diff --git a/runner/shared-interfaces.ts b/runner/shared-interfaces.ts index d3cd067..e28c4b8 100644 --- a/runner/shared-interfaces.ts +++ b/runner/shared-interfaces.ts @@ -29,6 +29,7 @@ export interface AssessmentConfig { autoraterModel?: string; a11yRepairAttempts?: number; skipLighthouse?: boolean; + maxBuildRepairAttempts?: number; } /**