feat: add Claude Code runner

crisbeto · crisbeto · commit 53c3144248ad · 2025-10-01T16:34:08.000+02:00
Adds a runner that generates code using Claude Code.
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ You can customize the `web-codegen-scorer eval` script with the following flags:
     - Example: `web-codegen-scorer eval --model=gemini-2.5-flash --autorater-model=gemini-2.5-flash --env=<config path>`
 
 - `--runner=<name>`: Specifies the runner to use to execute the eval. Supported runners are
-  `genkit` (default) or `gemini-cli`.
+  `genkit` (default), `gemini-cli` or `claude-code`.
 
 - `--local`: Runs the script in local mode for the initial code generation request. Instead of
   calling the LLM, it will attempt to read the initial code from a corresponding file in the
diff --git a/package.json b/package.json
@@ -51,6 +51,7 @@
     "wcs": "./runner/bin/cli.js"
   },
   "dependencies": {
+    "@anthropic-ai/claude-code": "^1.0.128",
     "@anthropic-ai/sdk": "^0.63.0",
     "@axe-core/puppeteer": "^4.10.2",
     "@genkit-ai/compat-oai": "^1.19.1",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/runner/codegen/claude-code-runner.ts b/runner/codegen/claude-code-runner.ts
@@ -0,0 +1,88 @@
+import {LlmGenerateFilesContext, LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
+import {join} from 'path';
+import {mkdirSync} from 'fs';
+import {writeFile} from 'fs/promises';
+import {BaseCliAgentRunner} from './base-cli-agent-runner.js';
+
+const MODEL_MAPPING: Record<string, string> = {
+  'claude-4.0-sonnet': 'claude-sonnet-4-20250514',
+  'claude-3.5-haiku': 'claude-3-5-haiku-latest',
+};
+
+/** Runner that generates code using the Claude Code. */
+export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
+  readonly id = 'claude-code';
+  readonly displayName = 'Claude Code';
+  readonly hasBuiltInRepairLoop = true;
+  protected ignoredFilePatterns = ['**/CLAUDE.md', '**/.claude/**'];
+  protected binaryName = 'claude';
+  protected override inactivityTimeoutMins = 10;
+  protected override totalRequestTimeoutMins = 10;
+
+  getSupportedModels(): string[] {
+    return Object.keys(MODEL_MAPPING);
+  }
+
+  protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
+    return [
+      '--print',
+      '--model',
+      MODEL_MAPPING[options.model],
+      // Skip all confirmations.
+      '--dangerously-skip-permissions',
+      '--permission-mode',
+      'bypassPermissions',
+      '--verbose',
+      options.context.executablePrompt,
+    ];
+  }
+
+  protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
+    const {context} = options;
+    const instructionFilePath = join(context.directory, 'CLAUDE.md');
+    const settingsDir = join(context.directory, '.claude');
+
+    mkdirSync(settingsDir);
+
+    await Promise.all([
+      writeFile(join(settingsDir, 'settings.json'), this.getSettingsJsonFile(options.context)),
+      writeFile(instructionFilePath, super.getCommonInstructions(options)),
+    ]);
+  }
+
+  private getSettingsJsonFile(context: LlmGenerateFilesContext): string {
+    const ignoredPatterns = super.getCommonIgnorePatterns();
+    const deniedPermissions: string[] = [
+      // Block some commands like `git` and `npm install` since they aren't relevant for the evals.
+      'Bash(git:*)',
+      ...ignoredPatterns.directories.map(dir => `"Read(${join(dir, '**')})"`),
+      ...ignoredPatterns.files.map(file => `"Read(${file})"`),
+      ...context.possiblePackageManagers
+        .filter(manager => manager !== context.packageManager)
+        .map(manager => `Bash(${manager}:*)`),
+
+      // Note that we don't block all commands,
+      // because the build commands also go through it.
+      `Bash(${context.packageManager} install:*)`,
+      `Bash(${context.packageManager} add:*)`,
+      `Bash(${context.packageManager} remove:*)`,
+      `Bash(${context.packageManager} update:*)`,
+      `Bash(${context.packageManager} list:*)`,
+    ];
+
+    return JSON.stringify(
+      {
+        permissions: {
+          deny: deniedPermissions,
+        },
+        env: {
+          DISABLE_AUTOUPDATER: 1,
+          DISABLE_TELEMETRY: 1,
+          DISABLE_ERROR_REPORTING: 1,
+        },
+      },
+      undefined,
+      2,
+    );
+  }
+}
diff --git a/runner/codegen/runner-creation.ts b/runner/codegen/runner-creation.ts
@@ -1,10 +1,12 @@
 import {UserFacingError} from '../utils/errors.js';
 import type {GeminiCliRunner} from './gemini-cli-runner.js';
+import type {ClaudeCodeRunner} from './claude-code-runner.js';
 import type {GenkitRunner} from './genkit/genkit-runner.js';
 
 interface AvailableRunners {
   genkit: GenkitRunner;
   'gemini-cli': GeminiCliRunner;
+  'claude-code': ClaudeCodeRunner;
 }
 
 /** Names of supported runners. */
@@ -25,6 +27,10 @@ export async function getRunnerByName<T extends RunnerName>(name: T): Promise<Av
       return import('./gemini-cli-runner.js').then(
         m => new m.GeminiCliRunner() as AvailableRunners[T],
       );
+    case 'claude-code':
+      return import('./claude-code-runner.js').then(
+        m => new m.ClaudeCodeRunner() as AvailableRunners[T],
+      );
     default:
       throw new UserFacingError(`Unsupported runner ${name}`);
   }
diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts
@@ -57,7 +57,7 @@ function builder(argv: Argv): Argv<Options> {
       .option('runner', {
         type: 'string',
         default: 'genkit' as const,
-        choices: ['genkit', 'gemini-cli'] as RunnerName[],
+        choices: ['genkit', 'gemini-cli', 'claude-code'] as RunnerName[],
         description: 'Runner to use to execute the eval',
       })
       .option('local', {