Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ web-codegen-scorer eval --env=angular-example
web-codegen-scorer init
```

5. (Optional) **Run an evaluated app locally:**

Once you've evaluated an app, you can run it locally with the following command:

```bash
web-codegen-scorer run --env=angular-example --prompt=<name of the prompt you want to run>
```

## Command-line flags

You can customize the `web-codegen-scorer eval` script with the following flags:
Expand Down
2 changes: 2 additions & 0 deletions runner/bin/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
import { EvalModule } from '../eval-cli.js';
import { ReportModule } from '../report-cli.js';
import { InitModule } from '../init-cli.js';
import { RunModule } from '../run-cli.js';

yargs()
.scriptName('web-codegen-scorer')
Expand All @@ -13,6 +14,7 @@ yargs()
.command(EvalModule.command, EvalModule.describe, EvalModule)
.command(ReportModule.command, ReportModule.describe, ReportModule)
.command(InitModule.command, InitModule.describe, InitModule)
.command(RunModule.command, RunModule.describe, RunModule)
.wrap(120)
.strict()
.help()
Expand Down
8 changes: 5 additions & 3 deletions runner/builder/serve-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ export async function serveApp(
let serveProcess: ChildProcess | null = null;

try {
const launchMessage = 'Launching app inside a browser';
progressLog('eval', launchMessage);
serveProcess = exec(serveCommand, { cwd: tempDir });
progressLog('eval', launchMessage, `(PID: ${serveProcess.pid})`);
progressLog(
'eval',
'Launching app inside a browser',
`(PID: ${serveProcess.pid})`
);

const actualPort = await new Promise<number>((resolvePort, rejectPort) => {
const serveStartTimeout = 45000; // 45s for serve to start
Expand Down
12 changes: 12 additions & 0 deletions runner/configuration/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,15 @@ export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
* MUST be kept in sync with `RunInfo.version`.
*/
export const REPORT_VERSION = 2;

/** Environments that are shipped together with the eval tool. */
export const BUILT_IN_ENVIRONMENTS = new Map<string, string>([
[
'angular-example',
join(import.meta.dirname, '../../examples/environments/angular/config.js'),
],
[
'solid-example',
join(import.meta.dirname, '../../examples/environments/solid/config.js'),
],
]);
15 changes: 2 additions & 13 deletions runner/eval-cli.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { Arguments, Argv, CommandModule } from 'yargs';
import chalk from 'chalk';
import { join } from 'path';
import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js';
import {
BUILT_IN_ENVIRONMENTS,
DEFAULT_AUTORATER_MODEL_NAME,
DEFAULT_MODEL_NAME,
} from './configuration/constants.js';
Expand All @@ -22,17 +22,6 @@ export const EvalModule = {
describe: 'Evaluate code using an LLM',
} satisfies CommandModule<{}, Options>;

const builtInEnvironments = new Map<string, string>([
[
'angular-example',
join(import.meta.dirname, '../examples/environments/angular/config.js'),
],
[
'solid-example',
join(import.meta.dirname, '../examples/environments/solid/config.js'),
],
]);

interface Options {
environment?: string;
model: string;
Expand Down Expand Up @@ -198,7 +187,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
ratingLlm,
model: cliArgs.model,
environmentConfigPath:
builtInEnvironments.get(cliArgs.environment) || cliArgs.environment,
BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment,
localMode: cliArgs.local,
limit: cliArgs.limit,
concurrency: cliArgs.concurrency as number,
Expand Down
197 changes: 197 additions & 0 deletions runner/run-cli.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import { Arguments, Argv, CommandModule } from 'yargs';
import chalk from 'chalk';
import process from 'process';
import { getEnvironmentByPath } from './configuration/environment-resolution.js';
import {
BUILT_IN_ENVIRONMENTS,
LLM_OUTPUT_DIR,
} from './configuration/constants.js';
import { UserFacingError } from './utils/errors.js';
import { existsSync, rmSync } from 'fs';
import { readFile, readdir } from 'fs/promises';
import { join } from 'path';
import { glob } from 'tinyglobby';
import { LlmResponseFile } from './shared-interfaces.js';
import {
setupProjectStructure,
writeResponseFiles,
} from './orchestration/file-system.js';
import { serveApp } from './builder/serve-app.js';
import { ProgressLogger, ProgressType } from './progress/progress-logger.js';
import { formatTitleCard } from './reporting/format.js';

export const RunModule = {
builder,
handler,
command: 'run',
describe: 'Run an evaluated app locally',
} satisfies CommandModule<{}, Options>;

interface Options {
environment: string;
prompt: string;
}

function builder(argv: Argv): Argv<Options> {
return argv
.option('environment', {
type: 'string',
alias: ['env'],
default: '',
description: 'Path to the environment configuration file',
})
.option('prompt', {
type: 'string',
default: '',
description: 'ID of the prompt within the environment that should be run',
})
.version(false)
.help();
}

async function handler(options: Arguments<Options>): Promise<void> {
try {
await runApp(options);
} catch (error) {
if (error instanceof UserFacingError) {
console.error(chalk.red(error.message));
} else {
throw error;
}
}
}

async function runApp(options: Options) {
const { environment, rootPromptDef, files } = await resolveConfig(options);
const progress = new ErrorOnlyProgressLogger();

console.log(
`Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...`
);

const { directory, cleanup } = await setupProjectStructure(
environment,
rootPromptDef,
progress
);

const processExitPromise = new Promise<void>((resolve) => {
const done = () => {
() => {
try {
// Note: we don't use `cleanup` here, because the call needs to be synchronous.
rmSync(directory, { recursive: true });
} catch {}
resolve();
};
};

process.on('exit', done);
process.on('close', done);
process.on('SIGINT', done);
});

try {
await writeResponseFiles(directory, files, environment, rootPromptDef.name);

await serveApp(
environment.serveCommand,
rootPromptDef.name,
directory,
() => {},
async (url) => {
console.log();
console.log(formatTitleCard(`🎉 App is up and running at ${url}`));
await processExitPromise;
}
);
} finally {
await cleanup();
}
}

async function resolveConfig(options: Options) {
if (!options.environment) {
throw new UserFacingError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't using Yargs demandOption here because we want this helpful error, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, the Yargs error isn't super readable.

[
'`--env` flag has not been specified. You have the following options:',
' - Pass a path to an environment config file using the `--env` flag.',
' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.',
' - Pass `--help` to see all available options.',
].join('\n')
);
} else if (!options.prompt) {
throw new UserFacingError(
'`--prompt` flag has not been specified. ' +
'You have to pass a prompt name through the `--prompt` flag.'
);
}

const environment = await getEnvironmentByPath(
BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment
);
const environmentDir = join(LLM_OUTPUT_DIR, environment.id);

if (!existsSync(environmentDir)) {
throw new UserFacingError(
`Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"`
);
}

const prompts = await getPossiblePrompts(environmentDir);

if (!prompts.includes(options.prompt)) {
throw new UserFacingError(
`There is no local LLM output for environment "${options.prompt}".\n` +
`The following prompts have local data:\n` +
prompts.map((p) => ` - ${p}`).join('\n')
);
}

const rootPromptDef = environment.executablePrompts.find(
(p) => p.name === options.prompt
);

if (!rootPromptDef) {
throw new UserFacingError(
`Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
`The following prompts are available:\n` +
environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n')
);
}

const promptDir = join(environmentDir, options.prompt);
const filePaths = await glob('**/*', { cwd: promptDir });
const files: LlmResponseFile[] = await Promise.all(
filePaths.map(async (path) => {
return {
filePath: path,
code: await readFile(join(promptDir, path), 'utf8'),
};
})
);

return { environment, rootPromptDef, files };
}

async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
const entities = await readdir(environmentDir, { withFileTypes: true });
return entities
.filter((entity) => entity.isDirectory())
.map((entity) => entity.name);
}

class ErrorOnlyProgressLogger implements ProgressLogger {
initialize(): void {}
finalize(): void {}

log(_: unknown, type: ProgressType, message: string, details?: string) {
if (type === 'error') {
console.error(chalk.red(message));

if (details) {
console.error(chalk.red(message));
}
}
}
}
Loading