Skip to content

Commit 3d2ee12

Browse files
committed
feat: add a script for running an evaluated app locally
Adds the `web-codegen-scorer run` script that allows users to run an evaluated app in their browser. It spins up a server using the local LLM output and the existing environment config.
1 parent 4ce8bbf commit 3d2ee12

File tree

6 files changed

+229
-16
lines changed

6 files changed

+229
-16
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ web-codegen-scorer eval --env=angular-example
5757
web-codegen-scorer init
5858
```
5959

60+
5. (Optional) **Run an evaluated app locally:**
61+
62+
Once you've evaluated an app, you can run it locally with the following command:
63+
64+
```bash
65+
web-codegen-scorer run --env=angular-example --prompt=<name of the prompt you want to run>
66+
```
67+
6068
## Command-line flags
6169

6270
You can customize the `web-codegen-scorer eval` script with the following flags:

runner/bin/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
55
import { EvalModule } from '../eval-cli.js';
66
import { ReportModule } from '../report-cli.js';
77
import { InitModule } from '../init-cli.js';
8+
import { RunModule } from '../run-cli.js';
89

910
yargs()
1011
.scriptName('web-codegen-scorer')
@@ -13,6 +14,7 @@ yargs()
1314
.command(EvalModule.command, EvalModule.describe, EvalModule)
1415
.command(ReportModule.command, ReportModule.describe, ReportModule)
1516
.command(InitModule.command, InitModule.describe, InitModule)
17+
.command(RunModule.command, RunModule.describe, RunModule)
1618
.wrap(120)
1719
.strict()
1820
.help()

runner/builder/serve-app.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ export async function serveApp(
1313
let serveProcess: ChildProcess | null = null;
1414

1515
try {
16-
const launchMessage = 'Launching app inside a browser';
17-
progressLog('eval', launchMessage);
1816
serveProcess = exec(serveCommand, { cwd: tempDir });
19-
progressLog('eval', launchMessage, `(PID: ${serveProcess.pid})`);
17+
progressLog(
18+
'eval',
19+
'Launching app inside a browser',
20+
`(PID: ${serveProcess.pid})`
21+
);
2022

2123
const actualPort = await new Promise<number>((resolvePort, rejectPort) => {
2224
const serveStartTimeout = 45000; // 45s for serve to start

runner/configuration/constants.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,15 @@ export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
3434
* MUST be kept in sync with `RunInfo.version`.
3535
*/
3636
export const REPORT_VERSION = 2;
37+
38+
/** Environments that are shipped together with the eval tool. */
39+
export const BUILT_IN_ENVIRONMENTS = new Map<string, string>([
40+
[
41+
'angular-example',
42+
join(import.meta.dirname, '../../examples/environments/angular/config.js'),
43+
],
44+
[
45+
'solid-example',
46+
join(import.meta.dirname, '../../examples/environments/solid/config.js'),
47+
],
48+
]);

runner/eval-cli.ts

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { Arguments, Argv, CommandModule } from 'yargs';
22
import chalk from 'chalk';
3-
import { join } from 'path';
43
import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js';
54
import {
5+
BUILT_IN_ENVIRONMENTS,
66
DEFAULT_AUTORATER_MODEL_NAME,
77
DEFAULT_MODEL_NAME,
88
} from './configuration/constants.js';
@@ -22,17 +22,6 @@ export const EvalModule = {
2222
describe: 'Evaluate code using an LLM',
2323
} satisfies CommandModule<{}, Options>;
2424

25-
const builtInEnvironments = new Map<string, string>([
26-
[
27-
'angular-example',
28-
join(import.meta.dirname, '../examples/environments/angular/config.js'),
29-
],
30-
[
31-
'solid-example',
32-
join(import.meta.dirname, '../examples/environments/solid/config.js'),
33-
],
34-
]);
35-
3625
interface Options {
3726
environment?: string;
3827
model: string;
@@ -198,7 +187,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
198187
ratingLlm,
199188
model: cliArgs.model,
200189
environmentConfigPath:
201-
builtInEnvironments.get(cliArgs.environment) || cliArgs.environment,
190+
BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment,
202191
localMode: cliArgs.local,
203192
limit: cliArgs.limit,
204193
concurrency: cliArgs.concurrency as number,

runner/run-cli.ts

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import { Arguments, Argv, CommandModule } from 'yargs';
2+
import chalk from 'chalk';
3+
import process from 'process';
4+
import { getEnvironmentByPath } from './configuration/environment-resolution.js';
5+
import {
6+
BUILT_IN_ENVIRONMENTS,
7+
LLM_OUTPUT_DIR,
8+
} from './configuration/constants.js';
9+
import { UserFacingError } from './utils/errors.js';
10+
import { existsSync, rmSync } from 'fs';
11+
import { readFile, readdir } from 'fs/promises';
12+
import { join } from 'path';
13+
import { glob } from 'tinyglobby';
14+
import { LlmResponseFile } from './shared-interfaces.js';
15+
import {
16+
setupProjectStructure,
17+
writeResponseFiles,
18+
} from './orchestration/file-system.js';
19+
import { serveApp } from './builder/serve-app.js';
20+
import { ProgressLogger, ProgressType } from './progress/progress-logger.js';
21+
import { formatTitleCard } from './reporting/format.js';
22+
23+
export const RunModule = {
24+
builder,
25+
handler,
26+
command: 'run',
27+
describe: 'Run an evaluated app locally',
28+
} satisfies CommandModule<{}, Options>;
29+
30+
interface Options {
31+
environment: string;
32+
prompt: string;
33+
}
34+
35+
function builder(argv: Argv): Argv<Options> {
36+
return argv
37+
.option('environment', {
38+
type: 'string',
39+
alias: ['env'],
40+
default: '',
41+
description: 'Path to the environment configuration file',
42+
})
43+
.option('prompt', {
44+
type: 'string',
45+
default: '',
46+
description: 'ID of the prompt within the environment that should be run',
47+
})
48+
.version(false)
49+
.help();
50+
}
51+
52+
async function handler(options: Arguments<Options>): Promise<void> {
53+
try {
54+
await runApp(options);
55+
} catch (error) {
56+
if (error instanceof UserFacingError) {
57+
console.error(chalk.red(error.message));
58+
} else {
59+
console.error(
60+
chalk.red('An error occurred during the assessment process:')
61+
);
62+
console.error(chalk.red(error));
63+
}
64+
}
65+
}
66+
67+
async function runApp(options: Options) {
68+
const { environment, rootPromptDef, files } = await resolveConfig(options);
69+
const progress = new ErrorOnlyProgressLogger();
70+
71+
console.log(
72+
`Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...`
73+
);
74+
75+
const { directory, cleanup } = await setupProjectStructure(
76+
environment,
77+
rootPromptDef,
78+
progress
79+
);
80+
81+
const processExitPromise = new Promise<void>((resolve) => {
82+
const done = () => {
83+
() => {
84+
try {
85+
// Note: we don't use `cleanup` here, because the call needs to be synchronous.
86+
rmSync(directory, { recursive: true });
87+
} catch {}
88+
resolve();
89+
};
90+
};
91+
92+
process.on('exit', done);
93+
process.on('close', done);
94+
process.on('SIGINT', done);
95+
});
96+
97+
try {
98+
await writeResponseFiles(directory, files, environment, rootPromptDef.name);
99+
100+
await serveApp(
101+
environment.serveCommand,
102+
rootPromptDef.name,
103+
directory,
104+
() => {},
105+
async (url) => {
106+
console.log();
107+
console.log(formatTitleCard(`🎉 App is up and running at ${url}`));
108+
await processExitPromise;
109+
}
110+
);
111+
} finally {
112+
await cleanup();
113+
}
114+
}
115+
116+
async function resolveConfig(options: Options) {
117+
if (!options.environment) {
118+
throw new UserFacingError(
119+
[
120+
'`--env` flag has not been specified. You have the following options:',
121+
' - Pass a path to an environment config file using the `--env` flag.',
122+
' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.',
123+
' - Pass `--help` to see all available options.',
124+
].join('\n')
125+
);
126+
} else if (!options.prompt) {
127+
throw new UserFacingError(
128+
'`--prompt` flag has not been specified. ' +
129+
'You have to pass a prompt name through the `--prompt` flag.'
130+
);
131+
}
132+
133+
const environment = await getEnvironmentByPath(
134+
BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment
135+
);
136+
const environmentDir = join(LLM_OUTPUT_DIR, environment.id);
137+
138+
if (!existsSync(environmentDir)) {
139+
throw new UserFacingError(
140+
`Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"`
141+
);
142+
}
143+
144+
const prompts = await getPossiblePrompts(environmentDir);
145+
146+
if (!prompts.includes(options.prompt)) {
147+
throw new UserFacingError(
148+
`There is no local LLM output for environment "${options.prompt}".\n` +
149+
`The following prompts have local data:\n` +
150+
prompts.map((p) => ` - ${p}`).join('\n')
151+
);
152+
}
153+
154+
const rootPromptDef = environment.executablePrompts.find(
155+
(p) => p.name === options.prompt
156+
);
157+
158+
if (!rootPromptDef) {
159+
throw new UserFacingError(
160+
`Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
161+
`The following prompts are available:\n` +
162+
environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n')
163+
);
164+
}
165+
166+
const promptDir = join(environmentDir, options.prompt);
167+
const filePaths = await glob('**/*', { cwd: promptDir });
168+
const files: LlmResponseFile[] = await Promise.all(
169+
filePaths.map(async (path) => {
170+
return {
171+
filePath: path,
172+
code: await readFile(join(promptDir, path), 'utf8'),
173+
};
174+
})
175+
);
176+
177+
return { environment, rootPromptDef, files };
178+
}
179+
180+
async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
181+
const entities = await readdir(environmentDir, { withFileTypes: true });
182+
return entities
183+
.filter((entity) => entity.isDirectory())
184+
.map((entity) => entity.name);
185+
}
186+
187+
class ErrorOnlyProgressLogger implements ProgressLogger {
188+
initialize(): void {}
189+
finalize(): void {}
190+
191+
log(_: unknown, type: ProgressType, message: string, details?: string) {
192+
if (type === 'error') {
193+
console.error(chalk.red(message));
194+
195+
if (details) {
196+
console.error(chalk.red(message));
197+
}
198+
}
199+
}
200+
}

0 commit comments

Comments
 (0)