From 4b46ff66f862109ffc089e387055e1ab4dd0a6f1 Mon Sep 17 00:00:00 2001
From: Paul Gschwendtner <paulgschwendtner@gmail.com>
Date: Fri, 10 Oct 2025 14:25:13 +0000
Subject: [PATCH] feat: support controlling build repair attempts

Support controlling build repair attempts.
---
 README.md                                | 2 ++
 runner/configuration/constants.ts        | 1 +
 runner/eval-cli.ts                       | 8 ++++++++
 runner/orchestration/build-serve-loop.ts | 6 +++---
 runner/shared-interfaces.ts              | 1 +
 5 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 8604847..b76634c 100644
--- a/README.md
+++ b/README.md
@@ -132,6 +132,8 @@ You can customize the `web-codegen-scorer eval` script with the following flags:
 - `--mcp`: Whether to start an MCP for the evaluation. Defaults to `false`.
     - Example: `web-codegen-scorer eval --mcp --env=<config path>`
 
+-- `--max-build-repair-attempts`: Number of repair attempts when build errors are discovered. Defaults to `1` attempt.
+
 - `--help`: Prints out usage information about the script.
 
 ### Additional configuration options
diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts
index dd83f1f..3151ec1 100644
--- a/runner/configuration/constants.ts
+++ b/runner/configuration/constants.ts
@@ -24,6 +24,7 @@ export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output');
  * Number of times we'll try to ask LLM to repair a build failure,
  * providing the build output and the code that causes the problem.
  */
+// Note: When updating, also adjust the default description in `README.md`.
 export const DEFAULT_MAX_REPAIR_ATTEMPTS = 1;
 
 /** Name of the folder where we store all generated reports */
diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts
index b6fdd19..39077ca 100644
--- a/runner/eval-cli.ts
+++ b/runner/eval-cli.ts
@@ -3,6 +3,7 @@ import chalk from 'chalk';
 import {
   BUILT_IN_ENVIRONMENTS,
   DEFAULT_AUTORATER_MODEL_NAME,
+  DEFAULT_MAX_REPAIR_ATTEMPTS,
   DEFAULT_MODEL_NAME,
 } from './configuration/constants.js';
 import {generateCodeAndAssess} from './orchestration/generate.js';
@@ -39,6 +40,7 @@ interface Options {
   a11yRepairAttempts?: number;
   logging?: 'text-only' | 'dynamic';
   skipLighthouse?: boolean;
+  maxBuildRepairAttempts?: number;
 }
 
 function builder(argv: Argv): Argv<Options> {
@@ -159,6 +161,11 @@ function builder(argv: Argv): Argv<Options> {
         default: false,
         description: 'Whether to skip collecting Lighthouse data',
       })
+      .option('max-build-repair-attempts', {
+        type: 'number',
+        default: DEFAULT_MAX_REPAIR_ATTEMPTS,
+        description: 'Number of repair attempts when build errors are discovered',
+      })
       .strict()
       .version(false)
       .help()
@@ -204,6 +211,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
       skipAiSummary: cliArgs.skipAiSummary,
       a11yRepairAttempts: cliArgs.a11yRepairAttempts,
       skipLighthouse: cliArgs.skipLighthouse,
+      maxBuildRepairAttempts: cliArgs.maxBuildRepairAttempts,
     });
 
     logReportToConsole(runInfo);
diff --git a/runner/orchestration/build-serve-loop.ts b/runner/orchestration/build-serve-loop.ts
index 67074eb..f543add 100644
--- a/runner/orchestration/build-serve-loop.ts
+++ b/runner/orchestration/build-serve-loop.ts
@@ -8,13 +8,13 @@ import {
   LlmContextFile,
   RootPromptDefinition,
 } from '../shared-interfaces.js';
-import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js';
 import {ProgressLogger} from '../progress/progress-logger.js';
 import {runBuild} from './build-worker.js';
 import {repairAndBuild} from './build-repair.js';
-import {EvalID, Executor} from './executors/executor.js';
+import {EvalID} from './executors/executor.js';
 import {serveAndTestApp} from './serve-testing-worker.js';
 import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js';
+import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js';
 
 /**
  * Attempts to build the code that an LLM generated. If the build fails, attempts
@@ -59,7 +59,7 @@ export async function attemptBuild(
   );
   let repairAttempts = 0;
   const maxRepairAttempts = (await env.executor.shouldRepairFailedBuilds(evalID))
-    ? DEFAULT_MAX_REPAIR_ATTEMPTS
+    ? (config.maxBuildRepairAttempts ?? DEFAULT_MAX_REPAIR_ATTEMPTS)
     : 0;
 
   const initialAttempt = {
diff --git a/runner/shared-interfaces.ts b/runner/shared-interfaces.ts
index d3cd067..e28c4b8 100644
--- a/runner/shared-interfaces.ts
+++ b/runner/shared-interfaces.ts
@@ -29,6 +29,7 @@ export interface AssessmentConfig {
   autoraterModel?: string;
   a11yRepairAttempts?: number;
   skipLighthouse?: boolean;
+  maxBuildRepairAttempts?: number;
 }
 
 /**