feat: add a script for running an evaluated app locally

crisbeto · crisbeto · commit 3d2ee12a5778 · 2025-09-19T15:35:31.000+02:00
Adds the `web-codegen-scorer run` script that allows users to run an evaluated app in their browser. It spins up a server using the local LLM output and the existing environment config.
diff --git a/README.md b/README.md
@@ -57,6 +57,14 @@ web-codegen-scorer eval --env=angular-example
 web-codegen-scorer init
 ```
 
+5. (Optional) **Run an evaluated app locally:**
+
+   Once you've evaluated an app, you can run it locally with the following command:
+
+```bash
+web-codegen-scorer run --env=angular-example --prompt=<name of the prompt you want to run>
+```
+
 ## Command-line flags
 
 You can customize the `web-codegen-scorer eval` script with the following flags:
diff --git a/runner/bin/cli.ts b/runner/bin/cli.ts
@@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
 import { EvalModule } from '../eval-cli.js';
 import { ReportModule } from '../report-cli.js';
 import { InitModule } from '../init-cli.js';
+import { RunModule } from '../run-cli.js';
 
 yargs()
   .scriptName('web-codegen-scorer')
@@ -13,6 +14,7 @@ yargs()
   .command(EvalModule.command, EvalModule.describe, EvalModule)
   .command(ReportModule.command, ReportModule.describe, ReportModule)
   .command(InitModule.command, InitModule.describe, InitModule)
+  .command(RunModule.command, RunModule.describe, RunModule)
   .wrap(120)
   .strict()
   .help()
diff --git a/runner/builder/serve-app.ts b/runner/builder/serve-app.ts
@@ -13,10 +13,12 @@ export async function serveApp(
   let serveProcess: ChildProcess | null = null;
 
   try {
-    const launchMessage = 'Launching app inside a browser';
-    progressLog('eval', launchMessage);
     serveProcess = exec(serveCommand, { cwd: tempDir });
-    progressLog('eval', launchMessage, `(PID: ${serveProcess.pid})`);
+    progressLog(
+      'eval',
+      'Launching app inside a browser',
+      `(PID: ${serveProcess.pid})`
+    );
 
     const actualPort = await new Promise<number>((resolvePort, rejectPort) => {
       const serveStartTimeout = 45000; // 45s for serve to start
diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts
@@ -34,3 +34,15 @@ export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
  * MUST be kept in sync with `RunInfo.version`.
  */
 export const REPORT_VERSION = 2;
+
+/** Environments that are shipped together with the eval tool. */
+export const BUILT_IN_ENVIRONMENTS = new Map<string, string>([
+  [
+    'angular-example',
+    join(import.meta.dirname, '../../examples/environments/angular/config.js'),
+  ],
+  [
+    'solid-example',
+    join(import.meta.dirname, '../../examples/environments/solid/config.js'),
+  ],
+]);
diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts
@@ -1,8 +1,8 @@
 import { Arguments, Argv, CommandModule } from 'yargs';
 import chalk from 'chalk';
-import { join } from 'path';
 import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js';
 import {
+  BUILT_IN_ENVIRONMENTS,
   DEFAULT_AUTORATER_MODEL_NAME,
   DEFAULT_MODEL_NAME,
 } from './configuration/constants.js';
@@ -22,17 +22,6 @@ export const EvalModule = {
   describe: 'Evaluate code using an LLM',
 } satisfies CommandModule<{}, Options>;
 
-const builtInEnvironments = new Map<string, string>([
-  [
-    'angular-example',
-    join(import.meta.dirname, '../examples/environments/angular/config.js'),
-  ],
-  [
-    'solid-example',
-    join(import.meta.dirname, '../examples/environments/solid/config.js'),
-  ],
-]);
-
 interface Options {
   environment?: string;
   model: string;
@@ -198,7 +187,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
       ratingLlm,
       model: cliArgs.model,
       environmentConfigPath:
-        builtInEnvironments.get(cliArgs.environment) || cliArgs.environment,
+        BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment,
       localMode: cliArgs.local,
       limit: cliArgs.limit,
       concurrency: cliArgs.concurrency as number,
diff --git a/runner/run-cli.ts b/runner/run-cli.ts
@@ -0,0 +1,200 @@
+import { Arguments, Argv, CommandModule } from 'yargs';
+import chalk from 'chalk';
+import process from 'process';
+import { getEnvironmentByPath } from './configuration/environment-resolution.js';
+import {
+  BUILT_IN_ENVIRONMENTS,
+  LLM_OUTPUT_DIR,
+} from './configuration/constants.js';
+import { UserFacingError } from './utils/errors.js';
+import { existsSync, rmSync } from 'fs';
+import { readFile, readdir } from 'fs/promises';
+import { join } from 'path';
+import { glob } from 'tinyglobby';
+import { LlmResponseFile } from './shared-interfaces.js';
+import {
+  setupProjectStructure,
+  writeResponseFiles,
+} from './orchestration/file-system.js';
+import { serveApp } from './builder/serve-app.js';
+import { ProgressLogger, ProgressType } from './progress/progress-logger.js';
+import { formatTitleCard } from './reporting/format.js';
+
+export const RunModule = {
+  builder,
+  handler,
+  command: 'run',
+  describe: 'Run an evaluated app locally',
+} satisfies CommandModule<{}, Options>;
+
+interface Options {
+  environment: string;
+  prompt: string;
+}
+
+function builder(argv: Argv): Argv<Options> {
+  return argv
+    .option('environment', {
+      type: 'string',
+      alias: ['env'],
+      default: '',
+      description: 'Path to the environment configuration file',
+    })
+    .option('prompt', {
+      type: 'string',
+      default: '',
+      description: 'ID of the prompt within the environment that should be run',
+    })
+    .version(false)
+    .help();
+}
+
+async function handler(options: Arguments<Options>): Promise<void> {
+  try {
+    await runApp(options);
+  } catch (error) {
+    if (error instanceof UserFacingError) {
+      console.error(chalk.red(error.message));
+    } else {
+      console.error(
+        chalk.red('An error occurred during the assessment process:')
+      );
+      console.error(chalk.red(error));
+    }
+  }
+}
+
+async function runApp(options: Options) {
+  const { environment, rootPromptDef, files } = await resolveConfig(options);
+  const progress = new ErrorOnlyProgressLogger();
+
+  console.log(
+    `Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...`
+  );
+
+  const { directory, cleanup } = await setupProjectStructure(
+    environment,
+    rootPromptDef,
+    progress
+  );
+
+  const processExitPromise = new Promise<void>((resolve) => {
+    const done = () => {
+      () => {
+        try {
+          // Note: we don't use `cleanup` here, because the call needs to be synchronous.
+          rmSync(directory, { recursive: true });
+        } catch {}
+        resolve();
+      };
+    };
+
+    process.on('exit', done);
+    process.on('close', done);
+    process.on('SIGINT', done);
+  });
+
+  try {
+    await writeResponseFiles(directory, files, environment, rootPromptDef.name);
+
+    await serveApp(
+      environment.serveCommand,
+      rootPromptDef.name,
+      directory,
+      () => {},
+      async (url) => {
+        console.log();
+        console.log(formatTitleCard(`🎉 App is up and running at ${url}`));
+        await processExitPromise;
+      }
+    );
+  } finally {
+    await cleanup();
+  }
+}
+
+async function resolveConfig(options: Options) {
+  if (!options.environment) {
+    throw new UserFacingError(
+      [
+        '`--env` flag has not been specified. You have the following options:',
+        ' - Pass a path to an environment config file using the `--env` flag.',
+        ' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.',
+        ' - Pass `--help` to see all available options.',
+      ].join('\n')
+    );
+  } else if (!options.prompt) {
+    throw new UserFacingError(
+      '`--prompt` flag has not been specified. ' +
+        'You have to pass a prompt name through the `--prompt` flag.'
+    );
+  }
+
+  const environment = await getEnvironmentByPath(
+    BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment
+  );
+  const environmentDir = join(LLM_OUTPUT_DIR, environment.id);
+
+  if (!existsSync(environmentDir)) {
+    throw new UserFacingError(
+      `Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"`
+    );
+  }
+
+  const prompts = await getPossiblePrompts(environmentDir);
+
+  if (!prompts.includes(options.prompt)) {
+    throw new UserFacingError(
+      `There is no local LLM output for environment "${options.prompt}".\n` +
+        `The following prompts have local data:\n` +
+        prompts.map((p) => ` - ${p}`).join('\n')
+    );
+  }
+
+  const rootPromptDef = environment.executablePrompts.find(
+    (p) => p.name === options.prompt
+  );
+
+  if (!rootPromptDef) {
+    throw new UserFacingError(
+      `Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
+        `The following prompts are available:\n` +
+        environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n')
+    );
+  }
+
+  const promptDir = join(environmentDir, options.prompt);
+  const filePaths = await glob('**/*', { cwd: promptDir });
+  const files: LlmResponseFile[] = await Promise.all(
+    filePaths.map(async (path) => {
+      return {
+        filePath: path,
+        code: await readFile(join(promptDir, path), 'utf8'),
+      };
+    })
+  );
+
+  return { environment, rootPromptDef, files };
+}
+
+async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
+  const entities = await readdir(environmentDir, { withFileTypes: true });
+  return entities
+    .filter((entity) => entity.isDirectory())
+    .map((entity) => entity.name);
+}
+
+class ErrorOnlyProgressLogger implements ProgressLogger {
+  initialize(): void {}
+  finalize(): void {}
+
+  log(_: unknown, type: ProgressType, message: string, details?: string) {
+    if (type === 'error') {
+      console.error(chalk.red(message));
+
+      if (details) {
+        console.error(chalk.red(message));
+      }
+    }
+  }
+}