Skip to content

Commit d4ae0a6

Browse files
committed
feat: add a script for running an evaluated app locally
Adds the `web-codegen-scorer run` script that allows users to run an evaluated app in their browser. It spins up a server using the local LLM output and the existing environment config.
1 parent 8748be0 commit d4ae0a6

File tree

6 files changed

+226
-16
lines changed

6 files changed

+226
-16
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ web-codegen-scorer eval --env=angular-example
5757
web-codegen-scorer init
5858
```
5959

60+
5. (Optional) **Run an evaluated app locally:**
61+
62+
Once you've evaluated an app, you can run it locally with the following command:
63+
64+
```bash
65+
web-codegen-scorer run --env=angular-example --prompt=<name of the prompt you want to run>
66+
```
67+
6068
## Command-line flags
6169

6270
You can customize the `web-codegen-scorer eval` script with the following flags:

runner/bin/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
55
import { EvalModule } from '../eval-cli.js';
66
import { ReportModule } from '../report-cli.js';
77
import { InitModule } from '../init-cli.js';
8+
import { RunModule } from '../run-cli.js';
89

910
yargs()
1011
.scriptName('web-codegen-scorer')
@@ -13,6 +14,7 @@ yargs()
1314
.command(EvalModule.command, EvalModule.describe, EvalModule)
1415
.command(ReportModule.command, ReportModule.describe, ReportModule)
1516
.command(InitModule.command, InitModule.describe, InitModule)
17+
.command(RunModule.command, RunModule.describe, RunModule)
1618
.wrap(120)
1719
.strict()
1820
.help()

runner/builder/serve-app.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ export async function serveApp(
1313
let serveProcess: ChildProcess | null = null;
1414

1515
try {
16-
const launchMessage = 'Launching app inside a browser';
17-
progressLog('eval', launchMessage);
1816
serveProcess = exec(serveCommand, { cwd: tempDir });
19-
progressLog('eval', launchMessage, `(PID: ${serveProcess.pid})`);
17+
progressLog(
18+
'eval',
19+
'Launching app inside a browser',
20+
`(PID: ${serveProcess.pid})`
21+
);
2022

2123
const actualPort = await new Promise<number>((resolvePort, rejectPort) => {
2224
const serveStartTimeout = 45000; // 45s for serve to start

runner/configuration/constants.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,15 @@ export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
3434
* MUST be kept in sync with `RunInfo.version`.
3535
*/
3636
export const REPORT_VERSION = 2;
37+
38+
/** Environments that are shipped together with the eval tool. */
39+
export const BUILT_IN_ENVIRONMENTS = new Map<string, string>([
40+
[
41+
'angular-example',
42+
join(import.meta.dirname, '../../examples/environments/angular/config.js'),
43+
],
44+
[
45+
'solid-example',
46+
join(import.meta.dirname, '../../examples/environments/solid/config.js'),
47+
],
48+
]);

runner/eval-cli.ts

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { Arguments, Argv, CommandModule } from 'yargs';
22
import chalk from 'chalk';
3-
import { join } from 'path';
43
import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js';
54
import {
5+
BUILT_IN_ENVIRONMENTS,
66
DEFAULT_AUTORATER_MODEL_NAME,
77
DEFAULT_MODEL_NAME,
88
} from './configuration/constants.js';
@@ -22,17 +22,6 @@ export const EvalModule = {
2222
describe: 'Evaluate code using an LLM',
2323
} satisfies CommandModule<{}, Options>;
2424

25-
const builtInEnvironments = new Map<string, string>([
26-
[
27-
'angular-example',
28-
join(import.meta.dirname, '../examples/environments/angular/config.js'),
29-
],
30-
[
31-
'solid-example',
32-
join(import.meta.dirname, '../examples/environments/solid/config.js'),
33-
],
34-
]);
35-
3625
interface Options {
3726
environment?: string;
3827
model: string;
@@ -198,7 +187,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
198187
ratingLlm,
199188
model: cliArgs.model,
200189
environmentConfigPath:
201-
builtInEnvironments.get(cliArgs.environment) || cliArgs.environment,
190+
BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment,
202191
localMode: cliArgs.local,
203192
limit: cliArgs.limit,
204193
concurrency: cliArgs.concurrency as number,

runner/run-cli.ts

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import { Arguments, Argv, CommandModule } from 'yargs';
2+
import chalk from 'chalk';
3+
import process from 'process';
4+
import { getEnvironmentByPath } from './configuration/environment-resolution.js';
5+
import {
6+
BUILT_IN_ENVIRONMENTS,
7+
LLM_OUTPUT_DIR,
8+
} from './configuration/constants.js';
9+
import { UserFacingError } from './utils/errors.js';
10+
import { existsSync, rmSync } from 'fs';
11+
import { readFile, readdir } from 'fs/promises';
12+
import { join } from 'path';
13+
import { glob } from 'tinyglobby';
14+
import { LlmResponseFile } from './shared-interfaces.js';
15+
import {
16+
setupProjectStructure,
17+
writeResponseFiles,
18+
} from './orchestration/file-system.js';
19+
import { serveApp } from './builder/serve-app.js';
20+
import { ProgressLogger, ProgressType } from './progress/progress-logger.js';
21+
import { formatTitleCard } from './reporting/format.js';
22+
23+
export const RunModule = {
24+
builder,
25+
handler,
26+
command: 'run',
27+
describe: 'Run an evaluated app locally',
28+
} satisfies CommandModule<{}, Options>;
29+
30+
interface Options {
31+
environment: string;
32+
prompt: string;
33+
}
34+
35+
function builder(argv: Argv): Argv<Options> {
36+
return argv
37+
.option('environment', {
38+
type: 'string',
39+
alias: ['env'],
40+
default: '',
41+
description: 'Path to the environment configuration file',
42+
})
43+
.option('prompt', {
44+
type: 'string',
45+
default: '',
46+
description: 'ID of the prompt within the environment that should be run',
47+
})
48+
.version(false)
49+
.help();
50+
}
51+
52+
async function handler(options: Arguments<Options>): Promise<void> {
53+
try {
54+
await runApp(options);
55+
} catch (error) {
56+
if (error instanceof UserFacingError) {
57+
console.error(chalk.red(error.message));
58+
} else {
59+
throw error;
60+
}
61+
}
62+
}
63+
64+
async function runApp(options: Options) {
65+
const { environment, rootPromptDef, files } = await resolveConfig(options);
66+
const progress = new ErrorOnlyProgressLogger();
67+
68+
console.log(
69+
`Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...`
70+
);
71+
72+
const { directory, cleanup } = await setupProjectStructure(
73+
environment,
74+
rootPromptDef,
75+
progress
76+
);
77+
78+
const processExitPromise = new Promise<void>((resolve) => {
79+
const done = () => {
80+
() => {
81+
try {
82+
// Note: we don't use `cleanup` here, because the call needs to be synchronous.
83+
rmSync(directory, { recursive: true });
84+
} catch {}
85+
resolve();
86+
};
87+
};
88+
89+
process.on('exit', done);
90+
process.on('close', done);
91+
process.on('SIGINT', done);
92+
});
93+
94+
try {
95+
await writeResponseFiles(directory, files, environment, rootPromptDef.name);
96+
97+
await serveApp(
98+
environment.serveCommand,
99+
rootPromptDef.name,
100+
directory,
101+
() => {},
102+
async (url) => {
103+
console.log();
104+
console.log(formatTitleCard(`🎉 App is up and running at ${url}`));
105+
await processExitPromise;
106+
}
107+
);
108+
} finally {
109+
await cleanup();
110+
}
111+
}
112+
113+
async function resolveConfig(options: Options) {
114+
if (!options.environment) {
115+
throw new UserFacingError(
116+
[
117+
'`--env` flag has not been specified. You have the following options:',
118+
' - Pass a path to an environment config file using the `--env` flag.',
119+
' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.',
120+
' - Pass `--help` to see all available options.',
121+
].join('\n')
122+
);
123+
} else if (!options.prompt) {
124+
throw new UserFacingError(
125+
'`--prompt` flag has not been specified. ' +
126+
'You have to pass a prompt name through the `--prompt` flag.'
127+
);
128+
}
129+
130+
const environment = await getEnvironmentByPath(
131+
BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment
132+
);
133+
const environmentDir = join(LLM_OUTPUT_DIR, environment.id);
134+
135+
if (!existsSync(environmentDir)) {
136+
throw new UserFacingError(
137+
`Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"`
138+
);
139+
}
140+
141+
const prompts = await getPossiblePrompts(environmentDir);
142+
143+
if (!prompts.includes(options.prompt)) {
144+
throw new UserFacingError(
145+
`There is no local LLM output for environment "${options.prompt}".\n` +
146+
`The following prompts have local data:\n` +
147+
prompts.map((p) => ` - ${p}`).join('\n')
148+
);
149+
}
150+
151+
const rootPromptDef = environment.executablePrompts.find(
152+
(p) => p.name === options.prompt
153+
);
154+
155+
if (!rootPromptDef) {
156+
throw new UserFacingError(
157+
`Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
158+
`The following prompts are available:\n` +
159+
environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n')
160+
);
161+
}
162+
163+
const promptDir = join(environmentDir, options.prompt);
164+
const filePaths = await glob('**/*', { cwd: promptDir });
165+
const files: LlmResponseFile[] = await Promise.all(
166+
filePaths.map(async (path) => {
167+
return {
168+
filePath: path,
169+
code: await readFile(join(promptDir, path), 'utf8'),
170+
};
171+
})
172+
);
173+
174+
return { environment, rootPromptDef, files };
175+
}
176+
177+
async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
178+
const entities = await readdir(environmentDir, { withFileTypes: true });
179+
return entities
180+
.filter((entity) => entity.isDirectory())
181+
.map((entity) => entity.name);
182+
}
183+
184+
class ErrorOnlyProgressLogger implements ProgressLogger {
185+
initialize(): void {}
186+
finalize(): void {}
187+
188+
log(_: unknown, type: ProgressType, message: string, details?: string) {
189+
if (type === 'error') {
190+
console.error(chalk.red(message));
191+
192+
if (details) {
193+
console.error(chalk.red(message));
194+
}
195+
}
196+
}
197+
}

0 commit comments

Comments
 (0)