Skip to content

Commit 41ada54

Browse files
committed
feat: remote environment support
This commit reorganizes the `web-codegen-scorer` to support remote environments. A remote environment is similar to the existing concept of environments, with the exception that the lifecycle of an environment can be managed in a hosted standalone server within a e.g. corporate network. The server would then provide additional features to the web-codegen-scorer, like: - different models for file generation - different execution sandboxes for building and serving an app (e.g. consider a framework like Wiz that is internal to Google) In practice, a remote environment exposes all of the important internal hooks to advanced users, so that they can be fully in charge of: - file generation via LLMs - building generated apps - repairing generated apps - serving generated apps Most users will never have to deal with this, but the architecture is highly beneficial for further separation of concerns in the codebase, plus potentially paving the way to support different languages (if we intend to do so), because the logic for testing a "served app" is easy to disable with these changes.
1 parent 068d084 commit 41ada54

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1855
-1342
lines changed

examples/environments/angular/config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// @ts-check
2+
13
import { getBuiltInRatings } from 'web-codegen-scorer';
24

35
/** @type {import("web-codegen-scorer").EnvironmentConfig} */
@@ -9,4 +11,5 @@ export default {
911
generationSystemPrompt: './system-instructions.md',
1012
executablePrompts: ['../../prompts/**/*.md'],
1113
packageManager: 'npm',
14+
buildCommand: '',
1215
};
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { getBuiltInRatings } from 'web-codegen-scorer';
2+
3+
/** @type {import("web-codegen-scorer").EnvironmentConfig} */
4+
export default {
5+
displayName: 'Remote Env (example)',
6+
clientSideFramework: 'angular',
7+
sourceDirectory: './project',
8+
ratings: getBuiltInRatings(),
9+
generationSystemPrompt: './system-instructions.md',
10+
executablePrompts: ['../../prompts/**/*.md'],
11+
packageManager: 'npm',
12+
};

report-app/src/app/pages/report-viewer/report-viewer.html

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -257,22 +257,22 @@ <h2>Generated applications</h2>
257257
{{ result.promptDef.name }}
258258

259259
<div class="status-badge-group">
260-
@let initialBuild = result.attemptDetails[0].buildResult;
261-
@let repairBuild =
260+
@let initialAttempt = result.attemptDetails[0];
261+
@let repairAttempt =
262262
result.attemptDetails.length > 1
263-
? result.attemptDetails[1].buildResult
263+
? result.attemptDetails[1]
264264
: null;
265-
@let finalBuild = repairBuild ?? initialBuild;
265+
@let finalAttempt = result.attemptDetails.at(-1)!;
266266

267-
@if (finalBuild.runtimeErrors) {
267+
@if (finalAttempt.serveTestingResult?.runtimeErrors) {
268268
<span class="status-badge error">Runtime error</span>
269269
}
270270

271-
@if (repairBuild?.status === 'error') {
271+
@if (repairAttempt?.buildResult?.status === 'error') {
272272
<span class="status-badge error">Build after repair</span>
273273
}
274274

275-
@if (initialBuild.status === 'error') {
275+
@if (initialAttempt?.buildResult?.status === 'error') {
276276
<span class="status-badge error">Initial build failed</span>
277277
}
278278
</div>
@@ -354,7 +354,8 @@ <h5>
354354
<h4>Additional info</h4>
355355
@for (attempt of result.attemptDetails; track attempt) {
356356
@let isBuilt = attempt.buildResult.status === 'success';
357-
@let axeViolations = attempt.buildResult.axeViolations;
357+
@let axeViolations =
358+
attempt.serveTestingResult?.axeViolations;
358359
@let hasAxeViolations =
359360
axeViolations && axeViolations.length > 0;
360361

@@ -501,12 +502,12 @@ <h5>Response</h5>
501502
}
502503
</div>
503504

504-
@if (finalBuild.runtimeErrors) {
505+
@let finalRuntimeErrors =
506+
finalAttempt.serveTestingResult?.runtimeErrors;
507+
@if (finalRuntimeErrors) {
505508
<div class="app-details-section">
506509
<h4>Runtime errors</h4>
507-
<pre class="callout warn code">{{
508-
finalBuild.runtimeErrors
509-
}}</pre>
510+
<pre class="callout warn code">{{ finalRuntimeErrors }}</pre>
510511
</div>
511512
}
512513

report-app/src/app/pages/report-viewer/report-viewer.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {
1212
viewChild,
1313
} from '@angular/core';
1414
import { NgxJsonViewerModule } from 'ngx-json-viewer';
15-
import { BuildErrorType } from '../../../../../runner/builder/builder-types';
15+
import { BuildErrorType } from '../../../../../runner/workers/builder/builder-types';
1616
import {
1717
AssessmentResult,
1818
IndividualAssessment,
@@ -237,7 +237,7 @@ export class ReportViewer {
237237
});
238238

239239
protected getScreenshotUrl(result: AssessmentResult): string | null {
240-
return result.build.screenshotPngUrl ?? null;
240+
return result.finalAttempt.serveTestingResult?.screenshotPngUrl ?? null;
241241
}
242242

243243
protected isLoading = this.reportsFetcher.isLoadingSingleReport;

report-app/src/app/shared/debugging-zip.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { BuildResultStatus } from '../../../../runner/builder/builder-types';
1+
import { BuildResultStatus } from '../../../../runner/workers/builder/builder-types';
22
import {
33
AssessmentResult,
44
RunInfo,
@@ -31,11 +31,11 @@ export async function createPromptDebuggingZip(
3131
zip.file('generated-files.md', generatedFiles);
3232

3333
let errors = ``;
34-
if (app.build.runtimeErrors) {
35-
errors += `## Runtime errors\n${app.build.runtimeErrors}\n`;
34+
if (app.finalAttempt.serveTestingResult?.runtimeErrors) {
35+
errors += `## Runtime errors\n${app.finalAttempt.serveTestingResult?.runtimeErrors}\n`;
3636
}
37-
if (app.build.status === BuildResultStatus.ERROR) {
38-
errors += `## Build error\n ${app.build.message}`;
37+
if (app.finalAttempt.buildResult.status === BuildResultStatus.ERROR) {
38+
errors += `## Build error\n ${app.finalAttempt.buildResult.message}`;
3939
}
4040

4141
zip.file('errors.md', errors);

runner/builder/builder-types.ts

Lines changed: 0 additions & 96 deletions
This file was deleted.

runner/codegen/gemini-cli/gemini-cli-runner.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
import { DirectorySnapshot } from './directory-snapshot.js';
1919
import { LlmResponseFile } from '../../shared-interfaces.js';
2020
import { UserFacingError } from '../../utils/errors.js';
21+
import assert from 'assert';
2122

2223
const SUPPORTED_MODELS = [
2324
'gemini-2.5-pro',
@@ -45,6 +46,19 @@ export class GeminiCliRunner implements LlmRunner {
4546
options: LlmGenerateFilesRequestOptions
4647
): Promise<LlmGenerateFilesResponse> {
4748
const { context, model } = options;
49+
50+
// TODO: Consider removing these assertions when we have better types here.
51+
// These fields are always set when running in a local environment, and this
52+
// is a requirement for selecting the `gemini-cli` runner.
53+
assert(
54+
context.buildCommand,
55+
'Expected a `buildCommand` to be set in the LLM generate request context'
56+
);
57+
assert(
58+
context.packageManager,
59+
'Expected a `packageManager` to be set in the LLM generate request context'
60+
);
61+
4862
const ignoreFilePath = join(context.directory, '.geminiignore');
4963
const instructionFilePath = join(context.directory, 'GEMINI.md');
5064
const settingsDir = join(context.directory, '.gemini');

runner/codegen/llm-runner.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,24 @@ export interface LlmGenerateFilesContext {
9898
/**
9999
* Combined system instructions and prompt for the environments
100100
* where the two can't be provided separately.
101+
*
102+
* TODO(crisbeto): Can we explain the reason for this better?
101103
*/
102104
combinedPrompt: string;
103105
/** Directory in which the generation will occur. */
104106
directory: string;
105-
/** Command that the LLM can use to verify that the build works. */
106-
buildCommand: string;
107-
/** Package manager that the LLM can use. */
108-
packageManager: string;
107+
/**
108+
* Command that the LLM can use to verify that the build works.
109+
*
110+
* Can be `undefined` for remote environments.
111+
*/
112+
buildCommand: string | undefined;
113+
/**
114+
* Package manager that the LLM can use.
115+
*
116+
* Can be `undefined` for remote environments.
117+
*/
118+
packageManager: string | undefined;
109119
/** All available package managers supported by the runner. */
110120
possiblePackageManagers: string[];
111121
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import z from 'zod';
2+
import { ratingSchema } from '../ratings/rating-types.js';
3+
import { MultiStepPrompt } from './multi-step-prompt.js';
4+
import { mcpServerOptionsSchema } from '../codegen/llm-runner.js';
5+
import { getPossiblePackageManagers } from './environment-config.js';
6+
7+
export const baseEnvironmentConfigSchema = z.strictObject({
8+
/** Display name for the environment. */
9+
displayName: z.string(),
10+
/**
11+
* Optional unique ID for the environment.
12+
* If one isn't provided, it will be computed from the `displayName`.
13+
*/
14+
id: z.string().optional(),
15+
/** ID of the client-side framework used within the environment. */
16+
clientSideFramework: z.string(),
17+
/** Ratings to run when evaluating the environment. */
18+
ratings: z.array(ratingSchema),
19+
/** Path to the prompt used by the LLM for generating files. */
20+
generationSystemPrompt: z.string(),
21+
/**
22+
* Path to the prompt used by the LLM for repairing builds or failures.
23+
*
24+
* If unset or `null`, the eval tool will use its default repair instructions.
25+
*/
26+
repairSystemPrompt: z.union([z.string(), z.null()]).optional(),
27+
/**
28+
* Path to the prompt used by the LLM for editing.
29+
*
30+
* Prompts running after the initial generation are considered as editing (e.g. multi step prompts).
31+
* If `null`, the eval tool will use the generation prompt for edits.
32+
*/
33+
editingSystemPrompt: z.union([z.string(), z.null()]).optional(),
34+
/** Prompts that should be sent to the LLM and written into the output. */
35+
executablePrompts: z.array(
36+
z.union([
37+
z.string(),
38+
z.strictObject({
39+
path: z.string(),
40+
name: z.string().optional(),
41+
ratings: z.array(ratingSchema).optional(),
42+
}),
43+
z.custom<MultiStepPrompt>((data) => data instanceof MultiStepPrompt),
44+
])
45+
),
46+
/**
47+
* ID of the fullstack framework used within the environment.
48+
* If omitted, it will default to the `clientSideFramework`.
49+
*/
50+
fullStackFramework: z.string().optional(),
51+
/** Path to the prompt to use when rating code. */
52+
codeRatingPrompt: z.string().optional(),
53+
/** When enabled, the system prompts for this environment won't be included in the report. */
54+
classifyPrompts: z.boolean().optional(),
55+
});

0 commit comments

Comments
 (0)