Skip to content

Commit 37e35c8

Browse files
committed
refactor: better organization and support for advanced executors
* Renames `Gateway` to `Executor` with strict validation via Zod. * Attaches executor specific configuration to the executor, instead of letting (previous) gateways extract information through multiple layer of indirections from the `Environment`. * This also resulted in lots of difficulties around generics and heritage. * No longer passes around gateway, or executors as they can be directly accessed from the `Environment`. * Adds a compatibility layer for old configs where the local executor is "assumed" and configured at the top-level environment config object. * Updates the remote environment example with the new API. * Improves the gateway/executor API to be better named and support async model validation, or destroying.
1 parent b6e0ca6 commit 37e35c8

32 files changed

+1002
-929
lines changed

examples/environments/angular/config.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// @ts-check
22

3-
import { getBuiltInRatings } from 'web-codegen-scorer';
3+
import {getBuiltInRatings} from 'web-codegen-scorer';
44

55
/** @type {import("web-codegen-scorer").EnvironmentConfig} */
66
export default {
@@ -11,5 +11,4 @@ export default {
1111
generationSystemPrompt: './system-instructions.md',
1212
executablePrompts: ['../../prompts/**/*.md'],
1313
packageManager: 'npm',
14-
buildCommand: '',
1514
};
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// @ts-check
22

33
/**
4-
* @import {RemoteEnvironmentConfig} from 'web-codegen-scorer';
4+
* @import {EnvironmentConfig} from 'web-codegen-scorer';
55
*/
66

7-
import { getBuiltInRatings } from 'web-codegen-scorer';
8-
import { FakeRemoteGateway } from './fake-gateway';
7+
import {getBuiltInRatings} from 'web-codegen-scorer';
8+
import {FakeRemoteExecutor} from './fake-executor';
99

10-
/** @type {RemoteEnvironmentConfig} */
10+
/** @type {EnvironmentConfig} */
1111
export default {
1212
displayName: 'Remote Env (example)',
1313
clientSideFramework: 'angular',
1414
ratings: getBuiltInRatings(),
1515
generationSystemPrompt: './system-instructions.md',
1616
executablePrompts: ['../../prompts/**/*.md'],
17-
gateway: new FakeRemoteGateway(),
17+
executor: new FakeRemoteExecutor(),
1818
};

examples/environments/remote_env/fake-gateway.ts renamed to examples/environments/remote_env/fake-executor.ts

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,16 @@ import {
22
BuildResult,
33
BuildResultStatus,
44
EvalID,
5-
Gateway,
5+
Executor,
66
LlmContextFile,
7+
LlmGenerateFilesRequest,
78
LlmResponse,
89
LlmResponseFile,
9-
RemoteEnvironment,
1010
RootPromptDefinition,
1111
} from '../../../runner';
12-
import { LlmGenerateFilesContext } from '../../../runner/codegen/llm-runner';
13-
import { ProgressLogger } from '../../../runner/progress/progress-logger';
12+
import {ProgressLogger} from '../../../runner/progress/progress-logger';
1413

15-
export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
14+
export class FakeRemoteExecutor implements Executor {
1615
ids = 0;
1716

1817
async initializeEval() {
@@ -26,46 +25,45 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
2625
async performFakeLlmRequest(): Promise<LlmResponse> {
2726
return {
2827
success: true,
29-
outputFiles: [{ code: 'Works!', filePath: 'main.ts' }],
28+
outputFiles: [{code: 'Works!', filePath: 'main.ts'}],
3029
reasoning: '',
3130
errors: [],
32-
usage: { inputTokens: 0, totalTokens: 0, outputTokens: 0 },
31+
usage: {inputTokens: 0, totalTokens: 0, outputTokens: 0},
3332
};
3433
}
3534

3635
generateInitialFiles(
3736
id: EvalID,
38-
requestCtx: LlmGenerateFilesContext,
37+
requestCtx: LlmGenerateFilesRequest,
3938
model: string,
4039
contextFiles: LlmContextFile[],
41-
abortSignal: AbortSignal
40+
abortSignal: AbortSignal,
4241
): Promise<LlmResponse> {
4342
// Generate the initial files of the eval app.
4443
// This generation can happen on a remote service with access to private models.
4544
return this.performFakeLlmRequest();
4645
}
4746

48-
repairBuild(
47+
generateRepairFiles(
4948
id: EvalID,
50-
requestCtx: LlmGenerateFilesContext,
49+
requestCtx: LlmGenerateFilesRequest,
5150
model: string,
5251
errorMessage: string,
5352
appFiles: LlmResponseFile[],
5453
contextFiles: LlmContextFile[],
55-
abortSignal: AbortSignal
54+
abortSignal: AbortSignal,
5655
): Promise<LlmResponse> {
5756
// Repair the given eval app.
5857
// This generation can happen on a remote service with access to private models.
5958
return this.performFakeLlmRequest();
6059
}
6160

62-
async serveBuild<T>(
61+
async serveWebApplication<T>(
6362
id: EvalID,
64-
env: RemoteEnvironment,
6563
appDirectoryPath: string,
6664
rootPromptDef: RootPromptDefinition,
6765
progress: ProgressLogger,
68-
logicWhileServing: (serveUrl: string) => Promise<T>
66+
logicWhileServing: (serveUrl: string) => Promise<T>,
6967
): Promise<T> {
7068
// Start serving of the app.
7169
// Invoke the logic while the server is running.
@@ -74,12 +72,10 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
7472
return result;
7573
}
7674

77-
async tryBuild(
75+
async performBuild(
7876
id: EvalID,
79-
env: RemoteEnvironment,
8077
appDirectoryPath: string,
8178
rootPromptDef: RootPromptDefinition,
82-
progress: ProgressLogger
8379
): Promise<BuildResult> {
8480
// Here, building can happen in the remote service.
8581
// Eval ID is useful here for storing the build on a server, for re-using later when serving.
@@ -89,7 +85,7 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
8985
};
9086
}
9187

92-
shouldRetryFailedBuilds() {
88+
async shouldRepairFailedBuilds() {
9389
// Some environments have a builtin retry loop as part of initial generation.
9490
// In those cases, you may want to skip retrying.
9591
return true;
@@ -98,4 +94,18 @@ export class FakeRemoteGateway implements Gateway<RemoteEnvironment> {
9894
async finalizeEval() {
9995
// Do your cleanup.
10096
}
97+
98+
async isSupportedModel() {
99+
return {supported: true};
100+
}
101+
102+
async getExecutorInfo() {
103+
return {
104+
id: 'fake-executor',
105+
displayName: 'Fake Executor',
106+
mcpServersLaunched: 0,
107+
};
108+
}
109+
110+
async destroy() {}
101111
}

runner/codegen/base-cli-agent-runner.ts

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ import {join, relative} from 'path';
33
import {existsSync} from 'fs';
44
import assert from 'assert';
55
import {
6-
LlmConstrainedOutputGenerateResponse,
7-
LlmGenerateFilesRequestOptions,
8-
LlmGenerateFilesResponse,
9-
LlmGenerateTextResponse,
6+
LocalLlmConstrainedOutputGenerateResponse,
7+
LocalLlmGenerateFilesRequestOptions,
8+
LocalLlmGenerateFilesResponse,
9+
LocalLlmGenerateTextResponse,
1010
} from './llm-runner.js';
1111
import {DirectorySnapshot} from './directory-snapshot.js';
1212
import {LlmResponseFile} from '../shared-interfaces.js';
@@ -17,8 +17,8 @@ export abstract class BaseCliAgentRunner {
1717
abstract readonly displayName: string;
1818
protected abstract readonly binaryName: string;
1919
protected abstract readonly ignoredFilePatterns: string[];
20-
protected abstract getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[];
21-
protected abstract writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void>;
20+
protected abstract getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[];
21+
protected abstract writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void>;
2222
protected inactivityTimeoutMins = 2;
2323
protected totalRequestTimeoutMins = 10;
2424

@@ -27,7 +27,9 @@ export abstract class BaseCliAgentRunner {
2727
private binaryPath: string | null = null;
2828
private commonIgnoredPatterns = ['**/node_modules/**', '**/dist/**', '**/.angular/**'];
2929

30-
async generateFiles(options: LlmGenerateFilesRequestOptions): Promise<LlmGenerateFilesResponse> {
30+
async generateFiles(
31+
options: LocalLlmGenerateFilesRequestOptions,
32+
): Promise<LocalLlmGenerateFilesResponse> {
3133
const {context} = options;
3234

3335
// TODO: Consider removing these assertions when we have better types.
@@ -64,12 +66,12 @@ export abstract class BaseCliAgentRunner {
6466
return {files, reasoning, toolLogs: []};
6567
}
6668

67-
generateText(): Promise<LlmGenerateTextResponse> {
69+
generateText(): Promise<LocalLlmGenerateTextResponse> {
6870
// Technically we can make this work, but we don't need it at the time of writing.
6971
throw new UserFacingError(`Generating text with ${this.displayName} is not supported.`);
7072
}
7173

72-
generateConstrained(): Promise<LlmConstrainedOutputGenerateResponse<any>> {
74+
generateConstrained(): Promise<LocalLlmConstrainedOutputGenerateResponse<any>> {
7375
// We can't support this, because there's no straightforward
7476
// way to tell the agent to follow a schema.
7577
throw new UserFacingError(`Constrained output with ${this.displayName} is not supported.`);
@@ -117,7 +119,7 @@ export abstract class BaseCliAgentRunner {
117119
}
118120

119121
/** Gets the common system instructions for all agents. */
120-
protected getCommonInstructions(options: LlmGenerateFilesRequestOptions) {
122+
protected getCommonInstructions(options: LocalLlmGenerateFilesRequestOptions) {
121123
return [
122124
`# Important Rules`,
123125
`The following instructions dictate how you should behave. It is CRITICAL that you follow them AS CLOSELY AS POSSIBLE:`,
@@ -170,7 +172,7 @@ export abstract class BaseCliAgentRunner {
170172
return binaryPath;
171173
}
172174

173-
private runAgentProcess(options: LlmGenerateFilesRequestOptions): Promise<string> {
175+
private runAgentProcess(options: LocalLlmGenerateFilesRequestOptions): Promise<string> {
174176
return new Promise<string>(resolve => {
175177
let stdoutBuffer = '';
176178
let stdErrBuffer = '';

runner/codegen/claude-code-runner.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
import {LlmGenerateFilesContext, LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
1+
import {
2+
LocalLlmGenerateFilesContext,
3+
LocalLlmGenerateFilesRequestOptions,
4+
LlmRunner,
5+
} from './llm-runner.js';
26
import {join} from 'path';
37
import {mkdirSync} from 'fs';
48
import {writeFile} from 'fs/promises';
@@ -25,7 +29,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
2529
return Object.keys(MODEL_MAPPING);
2630
}
2731

28-
protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
32+
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
2933
return [
3034
'--print',
3135
'--model',
@@ -39,7 +43,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
3943
];
4044
}
4145

42-
protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
46+
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
4347
const {context} = options;
4448
const instructionFilePath = join(context.directory, 'CLAUDE.md');
4549
const settingsDir = join(context.directory, '.claude');
@@ -52,7 +56,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner {
5256
]);
5357
}
5458

55-
private getSettingsJsonFile(context: LlmGenerateFilesContext): string {
59+
private getSettingsJsonFile(context: LocalLlmGenerateFilesContext): string {
5660
const ignoredPatterns = super.getCommonIgnorePatterns();
5761
const deniedPermissions: string[] = [
5862
// Block some commands like `git` and `npm install` since they aren't relevant for the evals.

runner/codegen/codex-runner.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
1+
import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
22
import {join} from 'path';
33
import {mkdirSync} from 'fs';
44
import {writeFile} from 'fs/promises';
@@ -22,7 +22,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner {
2222
return Object.keys(MODEL_MAPPING);
2323
}
2424

25-
protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
25+
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
2626
return [
2727
'exec',
2828
'--model',
@@ -34,7 +34,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner {
3434
];
3535
}
3636

37-
protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
37+
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
3838
const {context} = options;
3939
const instructionFilePath = join(context.directory, 'AGENTS.md');
4040
const settingsDir = join(context.directory, '.codex');

runner/codegen/gemini-cli-runner.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
1+
import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js';
22
import {join} from 'path';
33
import {mkdirSync} from 'fs';
44
import {writeFile} from 'fs/promises';
@@ -18,7 +18,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner {
1818
return SUPPORTED_MODELS;
1919
}
2020

21-
protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] {
21+
protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] {
2222
return [
2323
'--prompt',
2424
options.context.executablePrompt,
@@ -30,7 +30,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner {
3030
];
3131
}
3232

33-
protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise<void> {
33+
protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise<void> {
3434
const {context} = options;
3535
const ignoreFilePath = join(context.directory, '.geminiignore');
3636
const instructionFilePath = join(context.directory, 'GEMINI.md');

runner/codegen/genkit/genkit-runner.ts

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin';
44
import {z} from 'zod';
55
import {
66
McpServerOptions,
7-
LlmConstrainedOutputGenerateRequestOptions,
8-
LlmConstrainedOutputGenerateResponse,
7+
LocalLlmConstrainedOutputGenerateRequestOptions,
8+
LocalLlmConstrainedOutputGenerateResponse,
99
LlmRunner,
10-
LlmGenerateFilesResponse,
11-
LlmGenerateTextResponse,
12-
LlmGenerateTextRequestOptions,
13-
LlmGenerateFilesRequestOptions,
10+
LocalLlmGenerateFilesResponse,
11+
LocalLlmGenerateTextResponse,
12+
LocalLlmGenerateTextRequestOptions,
13+
LocalLlmGenerateFilesRequestOptions,
1414
} from '../llm-runner.js';
1515
import {setTimeout} from 'node:timers/promises';
1616
import {callWithTimeout} from '../../utils/timeout.js';
@@ -34,8 +34,8 @@ export class GenkitRunner implements LlmRunner {
3434
private toolLogs: ToolLogEntry[] = [];
3535

3636
async generateConstrained<T extends z.ZodTypeAny = z.ZodTypeAny>(
37-
options: LlmConstrainedOutputGenerateRequestOptions<T>,
38-
): Promise<LlmConstrainedOutputGenerateResponse<T>> {
37+
options: LocalLlmConstrainedOutputGenerateRequestOptions<T>,
38+
): Promise<LocalLlmConstrainedOutputGenerateResponse<T>> {
3939
const {provider, model} = this.resolveModel(options.model);
4040
const result = await this._genkitRequest(provider, model, options);
4141

@@ -46,8 +46,10 @@ export class GenkitRunner implements LlmRunner {
4646
};
4747
}
4848

49-
async generateFiles(options: LlmGenerateFilesRequestOptions): Promise<LlmGenerateFilesResponse> {
50-
const requestOptions: LlmConstrainedOutputGenerateRequestOptions = {
49+
async generateFiles(
50+
options: LocalLlmGenerateFilesRequestOptions,
51+
): Promise<LocalLlmGenerateFilesResponse> {
52+
const requestOptions: LocalLlmConstrainedOutputGenerateRequestOptions = {
5153
...options,
5254
prompt: options.context.combinedPrompt,
5355
schema: z.object({
@@ -80,7 +82,9 @@ export class GenkitRunner implements LlmRunner {
8082
return this.toolLogs.splice(0);
8183
}
8284

83-
async generateText(options: LlmGenerateTextRequestOptions): Promise<LlmGenerateTextResponse> {
85+
async generateText(
86+
options: LocalLlmGenerateTextRequestOptions,
87+
): Promise<LocalLlmGenerateTextResponse> {
8488
const {provider, model} = this.resolveModel(options.model);
8589
const result = await this._genkitRequest(provider, model, options);
8690

@@ -103,14 +107,14 @@ export class GenkitRunner implements LlmRunner {
103107
private async _genkitRequest(
104108
provider: GenkitModelProvider,
105109
model: ModelReference<any>,
106-
options: LlmGenerateTextRequestOptions | LlmConstrainedOutputGenerateRequestOptions,
110+
options: LocalLlmGenerateTextRequestOptions | LocalLlmConstrainedOutputGenerateRequestOptions,
107111
) {
108112
return await rateLimitLLMRequest(
109113
provider,
110114
model,
111115
{messages: options.messages || [], prompt: options.prompt},
112116
() => {
113-
const schema = (options as Partial<LlmConstrainedOutputGenerateRequestOptions>).schema;
117+
const schema = (options as Partial<LocalLlmConstrainedOutputGenerateRequestOptions>).schema;
114118
const performRequest = async () => {
115119
let tools: ToolAction[] | undefined;
116120
let resources: DynamicResourceAction[] | undefined;

0 commit comments

Comments
 (0)