diff --git a/README.md b/README.md index 908300b..cc34717 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,14 @@ web-codegen-scorer eval --env=angular-example web-codegen-scorer init ``` +5. (Optional) **Run an evaluated app locally:** + + Once you've evaluated an app, you can run it locally with the following command: + +```bash +web-codegen-scorer run --env=angular-example --prompt= +``` + ## Command-line flags You can customize the `web-codegen-scorer eval` script with the following flags: diff --git a/runner/bin/cli.ts b/runner/bin/cli.ts index 023af62..fae080f 100644 --- a/runner/bin/cli.ts +++ b/runner/bin/cli.ts @@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers'; import { EvalModule } from '../eval-cli.js'; import { ReportModule } from '../report-cli.js'; import { InitModule } from '../init-cli.js'; +import { RunModule } from '../run-cli.js'; yargs() .scriptName('web-codegen-scorer') @@ -13,6 +14,7 @@ yargs() .command(EvalModule.command, EvalModule.describe, EvalModule) .command(ReportModule.command, ReportModule.describe, ReportModule) .command(InitModule.command, InitModule.describe, InitModule) + .command(RunModule.command, RunModule.describe, RunModule) .wrap(120) .strict() .help() diff --git a/runner/builder/serve-app.ts b/runner/builder/serve-app.ts index 62d15e2..f1cc648 100644 --- a/runner/builder/serve-app.ts +++ b/runner/builder/serve-app.ts @@ -13,10 +13,12 @@ export async function serveApp( let serveProcess: ChildProcess | null = null; try { - const launchMessage = 'Launching app inside a browser'; - progressLog('eval', launchMessage); serveProcess = exec(serveCommand, { cwd: tempDir }); - progressLog('eval', launchMessage, `(PID: ${serveProcess.pid})`); + progressLog( + 'eval', + 'Launching app inside a browser', + `(PID: ${serveProcess.pid})` + ); const actualPort = await new Promise((resolvePort, rejectPort) => { const serveStartTimeout = 45000; // 45s for serve to start diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts index 1fb1a1e..a37d1cd 100644 --- a/runner/configuration/constants.ts +++ b/runner/configuration/constants.ts @@ -34,3 +34,15 @@ export const REPORTS_ROOT_DIR = join(rootDir, 'reports'); * MUST be kept in sync with `RunInfo.version`. */ export const REPORT_VERSION = 2; + +/** Environments that are shipped together with the eval tool. */ +export const BUILT_IN_ENVIRONMENTS = new Map([ + [ + 'angular-example', + join(import.meta.dirname, '../../examples/environments/angular/config.js'), + ], + [ + 'solid-example', + join(import.meta.dirname, '../../examples/environments/solid/config.js'), + ], +]); diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts index 90ea718..8997044 100644 --- a/runner/eval-cli.ts +++ b/runner/eval-cli.ts @@ -1,8 +1,8 @@ import { Arguments, Argv, CommandModule } from 'yargs'; import chalk from 'chalk'; -import { join } from 'path'; import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js'; import { + BUILT_IN_ENVIRONMENTS, DEFAULT_AUTORATER_MODEL_NAME, DEFAULT_MODEL_NAME, } from './configuration/constants.js'; @@ -22,17 +22,6 @@ export const EvalModule = { describe: 'Evaluate code using an LLM', } satisfies CommandModule<{}, Options>; -const builtInEnvironments = new Map([ - [ - 'angular-example', - join(import.meta.dirname, '../examples/environments/angular/config.js'), - ], - [ - 'solid-example', - join(import.meta.dirname, '../examples/environments/solid/config.js'), - ], -]); - interface Options { environment?: string; model: string; @@ -198,7 +187,7 @@ async function handler(cliArgs: Arguments): Promise { ratingLlm, model: cliArgs.model, environmentConfigPath: - builtInEnvironments.get(cliArgs.environment) || cliArgs.environment, + BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment, localMode: cliArgs.local, limit: cliArgs.limit, concurrency: cliArgs.concurrency as number, diff --git a/runner/run-cli.ts b/runner/run-cli.ts new file mode 100644 index 0000000..d47f4fe --- /dev/null +++ b/runner/run-cli.ts @@ -0,0 +1,197 @@ +import { Arguments, Argv, CommandModule } from 'yargs'; +import chalk from 'chalk'; +import process from 'process'; +import { getEnvironmentByPath } from './configuration/environment-resolution.js'; +import { + BUILT_IN_ENVIRONMENTS, + LLM_OUTPUT_DIR, +} from './configuration/constants.js'; +import { UserFacingError } from './utils/errors.js'; +import { existsSync, rmSync } from 'fs'; +import { readFile, readdir } from 'fs/promises'; +import { join } from 'path'; +import { glob } from 'tinyglobby'; +import { LlmResponseFile } from './shared-interfaces.js'; +import { + setupProjectStructure, + writeResponseFiles, +} from './orchestration/file-system.js'; +import { serveApp } from './builder/serve-app.js'; +import { ProgressLogger, ProgressType } from './progress/progress-logger.js'; +import { formatTitleCard } from './reporting/format.js'; + +export const RunModule = { + builder, + handler, + command: 'run', + describe: 'Run an evaluated app locally', +} satisfies CommandModule<{}, Options>; + +interface Options { + environment: string; + prompt: string; +} + +function builder(argv: Argv): Argv { + return argv + .option('environment', { + type: 'string', + alias: ['env'], + default: '', + description: 'Path to the environment configuration file', + }) + .option('prompt', { + type: 'string', + default: '', + description: 'ID of the prompt within the environment that should be run', + }) + .version(false) + .help(); +} + +async function handler(options: Arguments): Promise { + try { + await runApp(options); + } catch (error) { + if (error instanceof UserFacingError) { + console.error(chalk.red(error.message)); + } else { + throw error; + } + } +} + +async function runApp(options: Options) { + const { environment, rootPromptDef, files } = await resolveConfig(options); + const progress = new ErrorOnlyProgressLogger(); + + console.log( + `Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...` + ); + + const { directory, cleanup } = await setupProjectStructure( + environment, + rootPromptDef, + progress + ); + + const processExitPromise = new Promise((resolve) => { + const done = () => { + () => { + try { + // Note: we don't use `cleanup` here, because the call needs to be synchronous. + rmSync(directory, { recursive: true }); + } catch {} + resolve(); + }; + }; + + process.on('exit', done); + process.on('close', done); + process.on('SIGINT', done); + }); + + try { + await writeResponseFiles(directory, files, environment, rootPromptDef.name); + + await serveApp( + environment.serveCommand, + rootPromptDef.name, + directory, + () => {}, + async (url) => { + console.log(); + console.log(formatTitleCard(`🎉 App is up and running at ${url}`)); + await processExitPromise; + } + ); + } finally { + await cleanup(); + } +} + +async function resolveConfig(options: Options) { + if (!options.environment) { + throw new UserFacingError( + [ + '`--env` flag has not been specified. You have the following options:', + ' - Pass a path to an environment config file using the `--env` flag.', + ' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.', + ' - Pass `--help` to see all available options.', + ].join('\n') + ); + } else if (!options.prompt) { + throw new UserFacingError( + '`--prompt` flag has not been specified. ' + + 'You have to pass a prompt name through the `--prompt` flag.' + ); + } + + const environment = await getEnvironmentByPath( + BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment + ); + const environmentDir = join(LLM_OUTPUT_DIR, environment.id); + + if (!existsSync(environmentDir)) { + throw new UserFacingError( + `Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"` + ); + } + + const prompts = await getPossiblePrompts(environmentDir); + + if (!prompts.includes(options.prompt)) { + throw new UserFacingError( + `There is no local LLM output for environment "${options.prompt}".\n` + + `The following prompts have local data:\n` + + prompts.map((p) => ` - ${p}`).join('\n') + ); + } + + const rootPromptDef = environment.executablePrompts.find( + (p) => p.name === options.prompt + ); + + if (!rootPromptDef) { + throw new UserFacingError( + `Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` + + `The following prompts are available:\n` + + environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n') + ); + } + + const promptDir = join(environmentDir, options.prompt); + const filePaths = await glob('**/*', { cwd: promptDir }); + const files: LlmResponseFile[] = await Promise.all( + filePaths.map(async (path) => { + return { + filePath: path, + code: await readFile(join(promptDir, path), 'utf8'), + }; + }) + ); + + return { environment, rootPromptDef, files }; +} + +async function getPossiblePrompts(environmentDir: string): Promise { + const entities = await readdir(environmentDir, { withFileTypes: true }); + return entities + .filter((entity) => entity.isDirectory()) + .map((entity) => entity.name); +} + +class ErrorOnlyProgressLogger implements ProgressLogger { + initialize(): void {} + finalize(): void {} + + log(_: unknown, type: ProgressType, message: string, details?: string) { + if (type === 'error') { + console.error(chalk.red(message)); + + if (details) { + console.error(chalk.red(message)); + } + } + } +}