diff --git a/examples/environments/angular/config.js b/examples/environments/angular/config.js index d960399..2f6edc8 100644 --- a/examples/environments/angular/config.js +++ b/examples/environments/angular/config.js @@ -1,6 +1,6 @@ // @ts-check -import { getBuiltInRatings } from 'web-codegen-scorer'; +import {getBuiltInRatings} from 'web-codegen-scorer'; /** @type {import("web-codegen-scorer").EnvironmentConfig} */ export default { @@ -11,5 +11,4 @@ export default { generationSystemPrompt: './system-instructions.md', executablePrompts: ['../../prompts/**/*.md'], packageManager: 'npm', - buildCommand: '', }; diff --git a/examples/environments/remote_env/config.js b/examples/environments/remote_env/config.js index 7ec084b..25c3912 100644 --- a/examples/environments/remote_env/config.js +++ b/examples/environments/remote_env/config.js @@ -1,18 +1,18 @@ // @ts-check /** - * @import {RemoteEnvironmentConfig} from 'web-codegen-scorer'; + * @import {EnvironmentConfig} from 'web-codegen-scorer'; */ -import { getBuiltInRatings } from 'web-codegen-scorer'; -import { FakeRemoteGateway } from './fake-gateway'; +import {getBuiltInRatings} from 'web-codegen-scorer'; +import {FakeRemoteExecutor} from './fake-executor'; -/** @type {RemoteEnvironmentConfig} */ +/** @type {EnvironmentConfig} */ export default { displayName: 'Remote Env (example)', clientSideFramework: 'angular', ratings: getBuiltInRatings(), generationSystemPrompt: './system-instructions.md', executablePrompts: ['../../prompts/**/*.md'], - gateway: new FakeRemoteGateway(), + executor: new FakeRemoteExecutor(), }; diff --git a/examples/environments/remote_env/fake-gateway.ts b/examples/environments/remote_env/fake-executor.ts similarity index 72% rename from examples/environments/remote_env/fake-gateway.ts rename to examples/environments/remote_env/fake-executor.ts index e442547..d4e9929 100644 --- a/examples/environments/remote_env/fake-gateway.ts +++ b/examples/environments/remote_env/fake-executor.ts @@ -2,17 +2,16 @@ import { BuildResult, BuildResultStatus, EvalID, - Gateway, + Executor, LlmContextFile, + LlmGenerateFilesRequest, LlmResponse, LlmResponseFile, - RemoteEnvironment, RootPromptDefinition, } from '../../../runner'; -import { LlmGenerateFilesContext } from '../../../runner/codegen/llm-runner'; -import { ProgressLogger } from '../../../runner/progress/progress-logger'; +import {ProgressLogger} from '../../../runner/progress/progress-logger'; -export class FakeRemoteGateway implements Gateway { +export class FakeRemoteExecutor implements Executor { ids = 0; async initializeEval() { @@ -26,46 +25,45 @@ export class FakeRemoteGateway implements Gateway { async performFakeLlmRequest(): Promise { return { success: true, - outputFiles: [{ code: 'Works!', filePath: 'main.ts' }], + outputFiles: [{code: 'Works!', filePath: 'main.ts'}], reasoning: '', errors: [], - usage: { inputTokens: 0, totalTokens: 0, outputTokens: 0 }, + usage: {inputTokens: 0, totalTokens: 0, outputTokens: 0}, }; } generateInitialFiles( id: EvalID, - requestCtx: LlmGenerateFilesContext, + requestCtx: LlmGenerateFilesRequest, model: string, contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { // Generate the initial files of the eval app. // This generation can happen on a remote service with access to private models. return this.performFakeLlmRequest(); } - repairBuild( + generateRepairFiles( id: EvalID, - requestCtx: LlmGenerateFilesContext, + requestCtx: LlmGenerateFilesRequest, model: string, errorMessage: string, appFiles: LlmResponseFile[], contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { // Repair the given eval app. // This generation can happen on a remote service with access to private models. return this.performFakeLlmRequest(); } - async serveBuild( + async serveWebApplication( id: EvalID, - env: RemoteEnvironment, appDirectoryPath: string, rootPromptDef: RootPromptDefinition, progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise + logicWhileServing: (serveUrl: string) => Promise, ): Promise { // Start serving of the app. // Invoke the logic while the server is running. @@ -74,12 +72,10 @@ export class FakeRemoteGateway implements Gateway { return result; } - async tryBuild( + async performBuild( id: EvalID, - env: RemoteEnvironment, appDirectoryPath: string, rootPromptDef: RootPromptDefinition, - progress: ProgressLogger ): Promise { // Here, building can happen in the remote service. // Eval ID is useful here for storing the build on a server, for re-using later when serving. @@ -89,7 +85,7 @@ export class FakeRemoteGateway implements Gateway { }; } - shouldRetryFailedBuilds() { + async shouldRepairFailedBuilds() { // Some environments have a builtin retry loop as part of initial generation. // In those cases, you may want to skip retrying. return true; @@ -98,4 +94,18 @@ export class FakeRemoteGateway implements Gateway { async finalizeEval() { // Do your cleanup. } + + async isSupportedModel() { + return {supported: true}; + } + + async getExecutorInfo() { + return { + id: 'fake-executor', + displayName: 'Fake Executor', + mcpServersLaunched: 0, + }; + } + + async destroy() {} } diff --git a/runner/codegen/base-cli-agent-runner.ts b/runner/codegen/base-cli-agent-runner.ts index 97644b4..0ba8e39 100644 --- a/runner/codegen/base-cli-agent-runner.ts +++ b/runner/codegen/base-cli-agent-runner.ts @@ -3,10 +3,10 @@ import {join, relative} from 'path'; import {existsSync} from 'fs'; import assert from 'assert'; import { - LlmConstrainedOutputGenerateResponse, - LlmGenerateFilesRequestOptions, - LlmGenerateFilesResponse, - LlmGenerateTextResponse, + LocalLlmConstrainedOutputGenerateResponse, + LocalLlmGenerateFilesRequestOptions, + LocalLlmGenerateFilesResponse, + LocalLlmGenerateTextResponse, } from './llm-runner.js'; import {DirectorySnapshot} from './directory-snapshot.js'; import {LlmResponseFile} from '../shared-interfaces.js'; @@ -17,8 +17,8 @@ export abstract class BaseCliAgentRunner { abstract readonly displayName: string; protected abstract readonly binaryName: string; protected abstract readonly ignoredFilePatterns: string[]; - protected abstract getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[]; - protected abstract writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise; + protected abstract getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[]; + protected abstract writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise; protected inactivityTimeoutMins = 2; protected totalRequestTimeoutMins = 10; @@ -27,7 +27,9 @@ export abstract class BaseCliAgentRunner { private binaryPath: string | null = null; private commonIgnoredPatterns = ['**/node_modules/**', '**/dist/**', '**/.angular/**']; - async generateFiles(options: LlmGenerateFilesRequestOptions): Promise { + async generateFiles( + options: LocalLlmGenerateFilesRequestOptions, + ): Promise { const {context} = options; // TODO: Consider removing these assertions when we have better types. @@ -64,12 +66,12 @@ export abstract class BaseCliAgentRunner { return {files, reasoning, toolLogs: []}; } - generateText(): Promise { + generateText(): Promise { // Technically we can make this work, but we don't need it at the time of writing. throw new UserFacingError(`Generating text with ${this.displayName} is not supported.`); } - generateConstrained(): Promise> { + generateConstrained(): Promise> { // We can't support this, because there's no straightforward // way to tell the agent to follow a schema. throw new UserFacingError(`Constrained output with ${this.displayName} is not supported.`); @@ -117,7 +119,7 @@ export abstract class BaseCliAgentRunner { } /** Gets the common system instructions for all agents. */ - protected getCommonInstructions(options: LlmGenerateFilesRequestOptions) { + protected getCommonInstructions(options: LocalLlmGenerateFilesRequestOptions) { return [ `# Important Rules`, `The following instructions dictate how you should behave. It is CRITICAL that you follow them AS CLOSELY AS POSSIBLE:`, @@ -170,7 +172,7 @@ export abstract class BaseCliAgentRunner { return binaryPath; } - private runAgentProcess(options: LlmGenerateFilesRequestOptions): Promise { + private runAgentProcess(options: LocalLlmGenerateFilesRequestOptions): Promise { return new Promise(resolve => { let stdoutBuffer = ''; let stdErrBuffer = ''; diff --git a/runner/codegen/claude-code-runner.ts b/runner/codegen/claude-code-runner.ts index 5c64bf5..2b008a6 100644 --- a/runner/codegen/claude-code-runner.ts +++ b/runner/codegen/claude-code-runner.ts @@ -1,4 +1,8 @@ -import {LlmGenerateFilesContext, LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js'; +import { + LocalLlmGenerateFilesContext, + LocalLlmGenerateFilesRequestOptions, + LlmRunner, +} from './llm-runner.js'; import {join} from 'path'; import {mkdirSync} from 'fs'; import {writeFile} from 'fs/promises'; @@ -25,7 +29,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner { return Object.keys(MODEL_MAPPING); } - protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] { + protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] { return [ '--print', '--model', @@ -39,7 +43,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner { ]; } - protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise { + protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise { const {context} = options; const instructionFilePath = join(context.directory, 'CLAUDE.md'); const settingsDir = join(context.directory, '.claude'); @@ -52,7 +56,7 @@ export class ClaudeCodeRunner extends BaseCliAgentRunner implements LlmRunner { ]); } - private getSettingsJsonFile(context: LlmGenerateFilesContext): string { + private getSettingsJsonFile(context: LocalLlmGenerateFilesContext): string { const ignoredPatterns = super.getCommonIgnorePatterns(); const deniedPermissions: string[] = [ // Block some commands like `git` and `npm install` since they aren't relevant for the evals. diff --git a/runner/codegen/codex-runner.ts b/runner/codegen/codex-runner.ts index 2c61933..9db8da3 100644 --- a/runner/codegen/codex-runner.ts +++ b/runner/codegen/codex-runner.ts @@ -1,4 +1,4 @@ -import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js'; +import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js'; import {join} from 'path'; import {mkdirSync} from 'fs'; import {writeFile} from 'fs/promises'; @@ -22,7 +22,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner { return Object.keys(MODEL_MAPPING); } - protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] { + protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] { return [ 'exec', '--model', @@ -34,7 +34,7 @@ export class CodexRunner extends BaseCliAgentRunner implements LlmRunner { ]; } - protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise { + protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise { const {context} = options; const instructionFilePath = join(context.directory, 'AGENTS.md'); const settingsDir = join(context.directory, '.codex'); diff --git a/runner/codegen/gemini-cli-runner.ts b/runner/codegen/gemini-cli-runner.ts index 93582eb..c76595e 100644 --- a/runner/codegen/gemini-cli-runner.ts +++ b/runner/codegen/gemini-cli-runner.ts @@ -1,4 +1,4 @@ -import {LlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js'; +import {LocalLlmGenerateFilesRequestOptions, LlmRunner} from './llm-runner.js'; import {join} from 'path'; import {mkdirSync} from 'fs'; import {writeFile} from 'fs/promises'; @@ -18,7 +18,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner { return SUPPORTED_MODELS; } - protected getCommandLineFlags(options: LlmGenerateFilesRequestOptions): string[] { + protected getCommandLineFlags(options: LocalLlmGenerateFilesRequestOptions): string[] { return [ '--prompt', options.context.executablePrompt, @@ -30,7 +30,7 @@ export class GeminiCliRunner extends BaseCliAgentRunner implements LlmRunner { ]; } - protected async writeAgentFiles(options: LlmGenerateFilesRequestOptions): Promise { + protected async writeAgentFiles(options: LocalLlmGenerateFilesRequestOptions): Promise { const {context} = options; const ignoreFilePath = join(context.directory, '.geminiignore'); const instructionFilePath = join(context.directory, 'GEMINI.md'); diff --git a/runner/codegen/genkit/genkit-runner.ts b/runner/codegen/genkit/genkit-runner.ts index e4b6455..dd1fa85 100644 --- a/runner/codegen/genkit/genkit-runner.ts +++ b/runner/codegen/genkit/genkit-runner.ts @@ -4,13 +4,13 @@ import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin'; import {z} from 'zod'; import { McpServerOptions, - LlmConstrainedOutputGenerateRequestOptions, - LlmConstrainedOutputGenerateResponse, + LocalLlmConstrainedOutputGenerateRequestOptions, + LocalLlmConstrainedOutputGenerateResponse, LlmRunner, - LlmGenerateFilesResponse, - LlmGenerateTextResponse, - LlmGenerateTextRequestOptions, - LlmGenerateFilesRequestOptions, + LocalLlmGenerateFilesResponse, + LocalLlmGenerateTextResponse, + LocalLlmGenerateTextRequestOptions, + LocalLlmGenerateFilesRequestOptions, } from '../llm-runner.js'; import {setTimeout} from 'node:timers/promises'; import {callWithTimeout} from '../../utils/timeout.js'; @@ -34,8 +34,8 @@ export class GenkitRunner implements LlmRunner { private toolLogs: ToolLogEntry[] = []; async generateConstrained( - options: LlmConstrainedOutputGenerateRequestOptions, - ): Promise> { + options: LocalLlmConstrainedOutputGenerateRequestOptions, + ): Promise> { const {provider, model} = this.resolveModel(options.model); const result = await this._genkitRequest(provider, model, options); @@ -46,8 +46,10 @@ export class GenkitRunner implements LlmRunner { }; } - async generateFiles(options: LlmGenerateFilesRequestOptions): Promise { - const requestOptions: LlmConstrainedOutputGenerateRequestOptions = { + async generateFiles( + options: LocalLlmGenerateFilesRequestOptions, + ): Promise { + const requestOptions: LocalLlmConstrainedOutputGenerateRequestOptions = { ...options, prompt: options.context.combinedPrompt, schema: z.object({ @@ -80,7 +82,9 @@ export class GenkitRunner implements LlmRunner { return this.toolLogs.splice(0); } - async generateText(options: LlmGenerateTextRequestOptions): Promise { + async generateText( + options: LocalLlmGenerateTextRequestOptions, + ): Promise { const {provider, model} = this.resolveModel(options.model); const result = await this._genkitRequest(provider, model, options); @@ -103,14 +107,14 @@ export class GenkitRunner implements LlmRunner { private async _genkitRequest( provider: GenkitModelProvider, model: ModelReference, - options: LlmGenerateTextRequestOptions | LlmConstrainedOutputGenerateRequestOptions, + options: LocalLlmGenerateTextRequestOptions | LocalLlmConstrainedOutputGenerateRequestOptions, ) { return await rateLimitLLMRequest( provider, model, {messages: options.messages || [], prompt: options.prompt}, () => { - const schema = (options as Partial).schema; + const schema = (options as Partial).schema; const performRequest = async () => { let tools: ToolAction[] | undefined; let resources: DynamicResourceAction[] | undefined; diff --git a/runner/codegen/llm-runner.ts b/runner/codegen/llm-runner.ts index e12dcdc..6946e8c 100644 --- a/runner/codegen/llm-runner.ts +++ b/runner/codegen/llm-runner.ts @@ -1,12 +1,22 @@ import {z} from 'zod'; -import {LlmResponseFile, ToolLogEntry, Usage} from '../shared-interfaces.js'; +import { + LlmGenerateFilesRequest, + LlmResponseFile, + ToolLogEntry, + Usage, +} from '../shared-interfaces.js'; import {UserFacingError} from '../utils/errors.js'; +import {Executor} from '../orchestration/executors/executor.js'; -export function assertValidModelName(value: string, availableModels: string[]) { - if (!availableModels.includes(value)) { +export async function assertValidModelName(value: string, executor: Executor) { + const checkResult = await executor.isSupportedModel(value); + if (!checkResult.supported) { throw new UserFacingError( - `Unsupported model specified. Available models:\n` + - availableModels.map(m => `- ${m}`).join('\n'), + `Unsupported model specified.${ + checkResult.availableModels !== undefined + ? ` Available models:\n` + checkResult.availableModels.map(m => `- ${m}`).join('\n') + : '' + }`, ); } } @@ -28,15 +38,17 @@ export interface LlmRunner { readonly hasBuiltInRepairLoop: boolean; /** Sends a file generation request to the LLM. */ - generateFiles(options: LlmGenerateFilesRequestOptions): Promise; + generateFiles( + options: LocalLlmGenerateFilesRequestOptions, + ): Promise; /** Sends a normal text generation request to the LLM. */ - generateText(options: LlmGenerateTextRequestOptions): Promise; + generateText(options: LocalLlmGenerateTextRequestOptions): Promise; /** Sends a schema-constrained generation request to the LLM. */ generateConstrained( - options: LlmConstrainedOutputGenerateRequestOptions, - ): Promise>; + options: LocalLlmConstrainedOutputGenerateRequestOptions, + ): Promise>; /** Gets the names of the models supported by the runner. */ getSupportedModels(): string[]; @@ -80,54 +92,34 @@ interface BaseLlmRequestOptions { } /** Options needed to send a text generation request. */ -export interface LlmGenerateTextRequestOptions extends BaseLlmRequestOptions { +export interface LocalLlmGenerateTextRequestOptions extends BaseLlmRequestOptions { /** Prompt to send. */ prompt: string; } /** Context needed for an file generation context. */ -export interface LlmGenerateFilesContext { - /** System instructions that should be included. */ - systemInstructions: string; - /** Prompt being executed. */ - executablePrompt: string; - /** - * Combined system instructions and prompt for the environments - * where the two can't be provided separately. - * - * TODO(crisbeto): Can we explain the reason for this better? - */ - combinedPrompt: string; - /** Directory in which the generation will occur. */ - directory: string; - /** - * Command that the LLM can use to verify that the build works. - * - * Can be `undefined` for remote environments. - */ +export interface LocalLlmGenerateFilesContext extends LlmGenerateFilesRequest { + /** Command that the LLM can use to verify that the build works. */ buildCommand: string | undefined; - /** - * Package manager that the LLM can use. - * - * Can be `undefined` for remote environments. - */ + /** Package manager that the LLM can use. */ packageManager: string | undefined; /** All available package managers supported by the runner. */ possiblePackageManagers: string[]; } /** Options needed to send a file generation request. */ -export interface LlmGenerateFilesRequestOptions extends BaseLlmRequestOptions { +export interface LocalLlmGenerateFilesRequestOptions extends BaseLlmRequestOptions { /** Context necessary for the request. */ - context: LlmGenerateFilesContext; + context: LocalLlmGenerateFilesContext; } /** * Options that can be passed for a schema-constrained generation * request to an LLM. */ -export interface LlmConstrainedOutputGenerateRequestOptions - extends BaseLlmRequestOptions { +export interface LocalLlmConstrainedOutputGenerateRequestOptions< + T extends z.ZodTypeAny = z.ZodTypeAny, +> extends BaseLlmRequestOptions { /** Prompt to send. */ prompt: string; /** Schema that the response should conform to. */ @@ -135,7 +127,7 @@ export interface LlmConstrainedOutputGenerateRequestOptions { +export interface LocalLlmConstrainedOutputGenerateResponse { /** Result generated by the LLM. */ output: z.infer | null; /** Token usage data, if available. */ @@ -155,12 +147,12 @@ interface BaseLlmGenerateResponse { } /** File generation response from the LLM. */ -export interface LlmGenerateFilesResponse extends BaseLlmGenerateResponse { +export interface LocalLlmGenerateFilesResponse extends BaseLlmGenerateResponse { files: LlmResponseFile[]; } /** Text response from the LLM. */ -export interface LlmGenerateTextResponse extends BaseLlmGenerateResponse { +export interface LocalLlmGenerateTextResponse extends BaseLlmGenerateResponse { text: string; } diff --git a/runner/configuration/base-environment-config.ts b/runner/configuration/base-environment-config.ts deleted file mode 100644 index fe311ae..0000000 --- a/runner/configuration/base-environment-config.ts +++ /dev/null @@ -1,55 +0,0 @@ -import z from 'zod'; -import {ratingSchema} from '../ratings/rating-types.js'; -import {MultiStepPrompt} from './multi-step-prompt.js'; -import {mcpServerOptionsSchema} from '../codegen/llm-runner.js'; -import {getPossiblePackageManagers} from './environment-config.js'; - -export const baseEnvironmentConfigSchema = z.strictObject({ - /** Display name for the environment. */ - displayName: z.string(), - /** - * Optional unique ID for the environment. - * If one isn't provided, it will be computed from the `displayName`. - */ - id: z.string().optional(), - /** ID of the client-side framework used within the environment. */ - clientSideFramework: z.string(), - /** Ratings to run when evaluating the environment. */ - ratings: z.array(ratingSchema), - /** Path to the prompt used by the LLM for generating files. */ - generationSystemPrompt: z.string(), - /** - * Path to the prompt used by the LLM for repairing builds or failures. - * - * If unset or `null`, the eval tool will use its default repair instructions. - */ - repairSystemPrompt: z.union([z.string(), z.null()]).optional(), - /** - * Path to the prompt used by the LLM for editing. - * - * Prompts running after the initial generation are considered as editing (e.g. multi step prompts). - * If `null`, the eval tool will use the generation prompt for edits. - */ - editingSystemPrompt: z.union([z.string(), z.null()]).optional(), - /** Prompts that should be sent to the LLM and written into the output. */ - executablePrompts: z.array( - z.union([ - z.string(), - z.strictObject({ - path: z.string(), - name: z.string().optional(), - ratings: z.array(ratingSchema).optional(), - }), - z.custom(data => data instanceof MultiStepPrompt), - ]), - ), - /** - * ID of the fullstack framework used within the environment. - * If omitted, it will default to the `clientSideFramework`. - */ - fullStackFramework: z.string().optional(), - /** Path to the prompt to use when rating code. */ - codeRatingPrompt: z.string().optional(), - /** When enabled, the system prompts for this environment won't be included in the report. */ - classifyPrompts: z.boolean().optional(), -}); diff --git a/runner/configuration/base-environment.ts b/runner/configuration/base-environment.ts deleted file mode 100644 index 64adb80..0000000 --- a/runner/configuration/base-environment.ts +++ /dev/null @@ -1,324 +0,0 @@ -import {readdirSync, readFileSync, statSync} from 'fs'; -import {basename, dirname, extname, join, resolve} from 'path'; -import {globSync} from 'tinyglobby'; -import { - FrameworkInfo, - MultiStepPromptDefinition, - PromptDefinition, - RootPromptDefinition, -} from '../shared-interfaces.js'; -import {Rating} from '../ratings/rating-types.js'; -import {renderHandlebarsTemplate} from './prompt-templating.js'; -import {lazy} from '../utils/lazy-creation.js'; -import {EnvironmentConfig} from './environment-config.js'; -import {MultiStepPrompt} from './multi-step-prompt.js'; -import {UserFacingError} from '../utils/errors.js'; -import {generateId} from '../utils/id-generation.js'; -import {Gateway} from '../orchestration/gateway.js'; -import {Environment} from './environment.js'; - -/** Represents a single prompt evaluation environment. */ -export abstract class BaseEnvironment { - /** Path at which the environment is defined. */ - readonly rootPath: string; - /** Unique ID for the environment. */ - readonly id: string; - /** Display name of the environment. */ - readonly displayName: string; - /** Information about the fullstack framework used within the environment. */ - readonly fullStackFramework: FrameworkInfo; - /** Information about the client-side framework used within the environment. */ - readonly clientSideFramework: FrameworkInfo; - /** Prompts that should be executed as a part of the evaluation. */ - readonly executablePrompts: RootPromptDefinition[]; - /** Path from which to read the code rating prompt. */ - readonly codeRatingPromptPath: string | null; - /** Whether the prompts should be removed from the final report. */ - readonly classifyPrompts: boolean; - /** Whether this is one of the built-in environment that come with the runner. */ - readonly isBuiltIn: boolean; - /** Gateway for interacting with environment. */ - abstract gateway: Gateway; - - constructor( - rootPath: string, - private readonly config: EnvironmentConfig, - ) { - this.rootPath = rootPath; - this.id = config.id || this.generateId(config.displayName); - this.displayName = config.displayName; - this.clientSideFramework = { - id: config.clientSideFramework, - displayName: - this.getFrameworkDisplayName(config.clientSideFramework) || config.clientSideFramework, - }; - this.fullStackFramework = config.fullStackFramework - ? { - id: config.fullStackFramework, - displayName: - this.getFrameworkDisplayName(config.fullStackFramework) || config.clientSideFramework, - } - : {...this.clientSideFramework}; - this.executablePrompts = this.resolveExecutablePrompts( - config.executablePrompts, - config.ratings, - ); - this.codeRatingPromptPath = config.codeRatingPrompt - ? join(rootPath, config.codeRatingPrompt) - : null; - this.classifyPrompts = config.classifyPrompts ?? false; - this.isBuiltIn = rootPath.includes('node_modules'); - } - - systemPromptGeneration = lazy(() => { - return this.renderRelativePrompt(this.config.generationSystemPrompt).result; - }); - - systemPromptRepair = lazy(() => { - if (!this.config.repairSystemPrompt) { - return 'Please fix the given errors and return the corrected code.'; - } - return this.renderRelativePrompt(this.config.repairSystemPrompt).result; - }); - - systemPromptEditing = lazy(() => { - if (!this.config.editingSystemPrompt) { - return this.systemPromptGeneration(); - } - return this.renderRelativePrompt(this.config.editingSystemPrompt).result; - }); - - /** - * Augments a prompt based on the environment's config. - * @param userPrompt Prompt that is being augmented. - * @param ragEndpoint Optional RAG endpoint to use when augmenting the prompt. - */ - async getPrompt( - type: 'generation' | 'editing', - userPrompt: string, - ragEndpoint?: string, - ): Promise { - const systemPrompt = - type === 'generation' - ? this.systemPromptGeneration() - : (this.systemPromptEditing() ?? this.systemPromptGeneration()); - - if (!ragEndpoint) { - return [systemPrompt, userPrompt].join('\n\n'); - } - - if (!ragEndpoint.includes('PROMPT')) { - throw new UserFacingError('The ragEndpoint must include the "PROMPT" substring.'); - } - const url = ragEndpoint.replace('PROMPT', encodeURIComponent(userPrompt)); - const response = await fetch(url); - if (!response.ok) { - throw new UserFacingError(`Failed to fetch from ${url}: ${response.statusText}`); - } - const ragContent = await response.text(); - return `${systemPrompt}\n\n${ragContent}`; - } - - /** - * Renders out a prompt with our custom templating support. - * @param content Raw content of the prompt. - * @param promptFilePath Path where the prompt is located. If null, embedding files into - * the prompt will not be supported. - * @param additionalContext Additional context variables to expose to the prompt. - */ - renderPrompt( - content: string, - promptFilePath: string | null, - additionalContext: Record = {}, - ) { - return renderHandlebarsTemplate(content, { - rootDir: promptFilePath ? dirname(promptFilePath) : null, - FULL_STACK_FRAMEWORK_NAME: this.fullStackFramework.displayName, - CLIENT_SIDE_FRAMEWORK_NAME: this.clientSideFramework.displayName, - ...additionalContext, - }); - } - - /** - * Gets the readable display name of a framework, based on its ID. - * @param id ID to be resolved. - */ - private getFrameworkDisplayName(id: string): string | null { - switch (id) { - case 'angular': - return 'Angular'; - case 'next': - return 'Next.js'; - case 'react': - return 'React'; - case 'vue': - return 'Vue.js'; - case 'svelte': - return 'Svelte'; - case 'solid': - return 'Solid.js'; - default: - return null; - } - } - - /** - * Resolves the prompt configuration into prompt definitions. - * @param rootPath Root path of the project. - * @param prompts Prompts to be resolved. - * @param envRatings Environment-level ratings. - */ - private resolveExecutablePrompts( - prompts: EnvironmentConfig['executablePrompts'], - envRatings: Rating[], - ) { - const result: RootPromptDefinition[] = []; - - for (const def of prompts) { - if (def instanceof MultiStepPrompt) { - result.push(this.getMultiStepPrompt(def, envRatings)); - } else { - let path: string; - let ratings: Rating[]; - let name: string | undefined = undefined; - - if (typeof def === 'string') { - path = def; - ratings = envRatings.slice(); - } else { - path = def.path; - ratings = [...(def.ratings ?? []), ...envRatings]; - name = def.name; - } - - globSync(path, {cwd: this.rootPath}).forEach(relativePath => { - result.push( - this.getStepPromptDefinition( - name ?? basename(relativePath, extname(relativePath)), - relativePath, - ratings, - /* isEditing */ false, - ), - ); - }); - } - } - - return result; - } - - /** - * Creates a prompt definition for a given step. - * - * @param name Name of the prompt. - * @param rootPath Root path of the project. - * @param relativePath Relative path to the prompt. - * @param ratings Ratings to run against the definition. - * @param isEditing Whether this is an editing or generation step. - */ - private getStepPromptDefinition( - name: string, - relativePath: string, - ratings: Rating[], - isEditing: boolean, - ): PromptDefinition { - const {result, contextFiles} = this.renderRelativePrompt(relativePath); - - return { - name: name, - kind: 'single', - prompt: result, - ratings, - systemPromptType: isEditing ? 'editing' : 'generation', - contextFilePatterns: contextFiles, - } satisfies PromptDefinition; - } - - /** - * Gets a multi-step form based on a configuration. - * @param rootPath Root path of the project. - * @param def Definition of the prompt. - * @param envRatings Environment-level ratings. - */ - private getMultiStepPrompt( - def: MultiStepPrompt, - envRatings: Rating[], - ): MultiStepPromptDefinition { - const promptRoot = resolve(this.rootPath, def.directoryPath); - const name = basename(promptRoot); - const steps: PromptDefinition[] = []; - const stepRegex = /^step-(\d+)/; - const stepValues: Record = {}; - - if (!statSync(promptRoot).isDirectory()) { - throw new UserFacingError( - `Multi-step prompt root must point to a directory. "${promptRoot}" is not a directory.`, - ); - } - - const entities = readdirSync(promptRoot, {withFileTypes: true}); - - if (entities.length === 0) { - throw new UserFacingError('Multi-step prompt directory cannot be empty.'); - } - - for (const current of entities) { - if (!current.isFile()) { - throw new UserFacingError( - `Multi-step prompt directory can only contain files. ${current.name} is not a file.`, - ); - } - - const match = current.name.match(stepRegex); - - if (!match || !match[1]) { - throw new UserFacingError( - `Multi-step prompt name must be in the form of \`step-\`, ` + - `but received '${current.name}'`, - ); - } - - const ratings = [...envRatings]; - - if (def.stepRatings[current.name]) { - ratings.unshift(...def.stepRatings[current.name]); - } - - const stepNum = parseInt(match[1]); - if (stepNum === 0) { - throw new UserFacingError('Multi-step prompts start with `step-1`.'); - } - const step = this.getStepPromptDefinition( - `${name}-step-${stepNum}`, - join(def.directoryPath, current.name), - ratings, - /*isEditing */ stepNum !== 1, - ); - - stepValues[step.name] = stepNum; - steps.push(step); - } - - return { - name, - kind: 'multi-step', - steps: steps.sort((a, b) => stepValues[a.name] - stepValues[b.name]), - } satisfies MultiStepPromptDefinition; - } - - private generateId(displayName: string): string { - const id = generateId(displayName); - - if (id === null) { - throw new UserFacingError(`Could not auto-generate an ID from "${displayName}"`); - } - - return id; - } - - /** Renders a prompt from a path relative to the environment config. */ - private renderRelativePrompt(relativePath: string) { - const path = resolve(this.rootPath, relativePath); - return this.renderPrompt(readFileSync(path, 'utf8'), path); - } -} diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index e15361e..a959738 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -1,19 +1,77 @@ import z from 'zod'; import {createMessageBuilder, fromError} from 'zod-validation-error/v3'; import {UserFacingError} from '../utils/errors.js'; -import {LocalEnvironmentConfig, localEnvironmentConfigSchema} from './environment-local.js'; -import {RemoteEnvironmentConfig, remoteEnvironmentConfigSchema} from './environment-remote.js'; +import {ratingSchema} from '../ratings/rating-types.js'; +import {MultiStepPrompt} from './multi-step-prompt.js'; +import {executorSchema} from '../orchestration/executors/executor.js'; +import { + LocalExecutorConfig, + localExecutorConfigSchema, +} from '../orchestration/executors/local-executor-config.js'; -const environmentConfigSchema = z.union([ - localEnvironmentConfigSchema, - remoteEnvironmentConfigSchema, -]); +export const environmentConfigSchema = z.object({ + /** Display name for the environment. */ + displayName: z.string(), + /** + * Optional unique ID for the environment. + * If one isn't provided, it will be computed from the `displayName`. + */ + id: z.string().optional(), + /** ID of the client-side framework used within the environment. */ + clientSideFramework: z.string(), + /** Ratings to run when evaluating the environment. */ + ratings: z.array(ratingSchema), + /** Path to the prompt used by the LLM for generating files. */ + generationSystemPrompt: z.string(), + /** + * Path to the prompt used by the LLM for repairing builds or failures. + * + * If unset or `null`, the eval tool will use its default repair instructions. + */ + repairSystemPrompt: z.union([z.string(), z.null()]).optional(), + /** + * Path to the prompt used by the LLM for editing. + * + * Prompts running after the initial generation are considered as editing (e.g. multi step prompts). + * If `null`, the eval tool will use the generation prompt for edits. + */ + editingSystemPrompt: z.union([z.string(), z.null()]).optional(), + /** Prompts that should be sent to the LLM and written into the output. */ + executablePrompts: z.array( + z.union([ + z.string(), + z.strictObject({ + path: z.string(), + name: z.string().optional(), + ratings: z.array(ratingSchema).optional(), + }), + z.custom(data => data instanceof MultiStepPrompt), + ]), + ), + /** + * ID of the fullstack framework used within the environment. + * If omitted, it will default to the `clientSideFramework`. + */ + fullStackFramework: z.string().optional(), + /** Path to the prompt to use when rating code. */ + codeRatingPrompt: z.string().optional(), + /** When enabled, the system prompts for this environment won't be included in the report. */ + classifyPrompts: z.boolean().optional(), + /** Executor to be used for this environment. */ + executor: executorSchema + .optional() + .describe( + 'Executor to be used for this environment. ' + + 'If unset, a local executor is derived from the full environment configuration.', + ), +}); /** * Shape of the object that configures an individual evaluation environment. Not intended to direct * reads, interact with the information through the `Environment` class. */ -export type EnvironmentConfig = z.infer; +export type EnvironmentConfig = z.infer & + Partial; /** Package managers that are currently supported. */ export function getPossiblePackageManagers() { @@ -22,7 +80,13 @@ export function getPossiblePackageManagers() { /** Asserts that the specified data is a valid environment config. */ export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig { - const validationResult = environmentConfigSchema.safeParse(value); + const validationResult = environmentConfigSchema + .merge( + // For backwards compatibility, users can directly configure the local executor + // in the top-level environment configuration. + localExecutorConfigSchema.partial(), + ) + .safeParse(value); if (!validationResult.success) { // TODO: we can use `z.prettifyError` once we update to zod v4, @@ -38,9 +102,3 @@ export function assertIsEnvironmentConfig(value: unknown): asserts value is Envi throw new UserFacingError(message); } } - -export function isLocalEnvironmentConfig( - config: EnvironmentConfig, -): config is LocalEnvironmentConfig { - return (config as Partial).gateway === undefined; -} diff --git a/runner/configuration/environment-local.ts b/runner/configuration/environment-local.ts deleted file mode 100644 index 9eefed2..0000000 --- a/runner/configuration/environment-local.ts +++ /dev/null @@ -1,101 +0,0 @@ -import {join} from 'path'; -import z from 'zod'; -import {LlmRunner, McpServerOptions, mcpServerOptionsSchema} from '../codegen/llm-runner.js'; -import {LocalGateway} from '../orchestration/gateways/local_gateway.js'; -import {BaseEnvironment} from './base-environment.js'; -import {EnvironmentConfig, getPossiblePackageManagers} from './environment-config.js'; -import {baseEnvironmentConfigSchema} from './base-environment-config.js'; - -export const localEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({ - /** MCP servers that can be started for this environment. */ - mcpServers: z.array(mcpServerOptionsSchema).optional(), - /** Relative path to the environment's source code in which to generate new code. */ - sourceDirectory: z.string().optional(), - /** - * Path to the template directory to use when creating - * the project which the LLM will run against. - */ - projectTemplate: z.string().optional(), - /** Package manager to use for the eval. */ - packageManager: z.enum(getPossiblePackageManagers()).optional(), - /** - * Command to run when building the generated code. - * Defaults to ` run build`. - */ - buildCommand: z.string().optional(), - /** - * Command to run when starting a development server inside the app. - * Defaults to ` run start --port 0`. - */ - serveCommand: z.string().optional(), - /** - * Whether to skip installing dependencies when running evals in the environment. - * Useful if you're managing dependencies yourself. - */ - skipInstall: z.boolean().optional(), -}); - -export type LocalEnvironmentConfig = z.infer; - -/** Represents a single prompt evaluation environment. */ -export class LocalEnvironment extends BaseEnvironment { - /** Configured package manager for the environment. */ - readonly packageManager: string; - /** Command used to install dependencies. */ - readonly installCommand: string; - /** Command to run when building the generated code. */ - readonly buildCommand: string; - /** Command to run when starting a development server inside the app. */ - readonly serveCommand: string; - /** - * Absolute path at which files specific to this environment are located. Will be merged in - * with the files from the `projectTemplatePath` to get the final project structure. - */ - readonly sourceDirectory: string | null; - /** - * Directory serving as a template for the environment. - * Files from the `sourceDirectory` will be applied on top to get the final project structure. - */ - readonly projectTemplatePath: string | null; - /** Options for MCP servers that should be started as a part of this environment. */ - readonly mcpServerOptions: McpServerOptions[]; - /** Whether to skip installing dependencies. */ - readonly skipInstall: boolean; - /** Gateway for interacting with the environment. */ - gateway: LocalGateway; - - constructor( - rootPath: string, - config: LocalEnvironmentConfig, - readonly llm: LlmRunner, - ) { - super(rootPath, config); - - this.gateway = new LocalGateway(llm); - - const packageManager = config.packageManager || 'npm'; - const projectTemplatePath = config.projectTemplate - ? join(rootPath, config.projectTemplate) - : null; - const sourceDirectory = config.sourceDirectory ? join(rootPath, config.sourceDirectory) : null; - this.packageManager = packageManager; - this.installCommand = `${packageManager} install --silent`; - this.buildCommand = config.buildCommand || `${packageManager} run build`; - this.serveCommand = config.serveCommand || this.getDefaultServeCommand(packageManager); - this.projectTemplatePath = projectTemplatePath; - this.sourceDirectory = sourceDirectory; - this.mcpServerOptions = config.mcpServers || []; - this.skipInstall = config.skipInstall ?? false; - } - - private getDefaultServeCommand(packageManager: LocalEnvironmentConfig['packageManager']): string { - const flags = '--port 0'; - - // npm needs -- to pass flags to the command. - if (packageManager === 'npm') { - return `npm run start -- ${flags}`; - } - - return `${packageManager} run start ${flags}`; - } -} diff --git a/runner/configuration/environment-remote.ts b/runner/configuration/environment-remote.ts deleted file mode 100644 index f64dad0..0000000 --- a/runner/configuration/environment-remote.ts +++ /dev/null @@ -1,21 +0,0 @@ -import z from 'zod'; -import {Gateway} from '../orchestration/gateway.js'; -import {BaseEnvironment} from './base-environment.js'; -import {baseEnvironmentConfigSchema} from './base-environment-config.js'; - -export const remoteEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({ - // TODO: Follow-up with a gateway validator, or make class abstract. - gateway: z.custom>(), -}); - -export type RemoteEnvironmentConfig = z.infer; - -/** Represents a single prompt evaluation environment. */ -export class RemoteEnvironment extends BaseEnvironment { - gateway: Gateway; - - constructor(rootPath: string, config: RemoteEnvironmentConfig) { - super(rootPath, config); - this.gateway = config.gateway; - } -} diff --git a/runner/configuration/environment-resolution.ts b/runner/configuration/environment-resolution.ts index 52a94c9..b547a77 100644 --- a/runner/configuration/environment-resolution.ts +++ b/runner/configuration/environment-resolution.ts @@ -1,12 +1,14 @@ -import {dirname} from 'path'; import {existsSync} from 'fs'; -import {assertIsEnvironmentConfig, isLocalEnvironmentConfig} from './environment-config.js'; +import {dirname} from 'path'; +import {fromZodError} from 'zod-validation-error/v3'; +import {RunnerName} from '../codegen/runner-creation.js'; import {toProcessAbsolutePath} from '../file-system-utils.js'; +import {Executor} from '../orchestration/executors/executor.js'; +import {localExecutorConfigSchema} from '../orchestration/executors/local-executor-config.js'; +import {LocalExecutor} from '../orchestration/executors/local-executor.js'; import {UserFacingError} from '../utils/errors.js'; +import {assertIsEnvironmentConfig, environmentConfigSchema} from './environment-config.js'; import {Environment} from './environment.js'; -import {LocalEnvironment} from './environment-local.js'; -import {RemoteEnvironment} from './environment-remote.js'; -import {getRunnerByName, RunnerName} from '../codegen/runner-creation.js'; const environmentsCache = new Map(); @@ -27,11 +29,41 @@ export async function getEnvironmentByPath( const result: {default: unknown} = await import(configPath); const rootPath = dirname(configPath); - assertIsEnvironmentConfig(result.default); - const environment = isLocalEnvironmentConfig(result.default) - ? new LocalEnvironment(rootPath, result.default, await getRunnerByName(runnerCliOption)) - : new RemoteEnvironment(rootPath, result.default); + const config = result.default; + assertIsEnvironmentConfig(config); + + let executor: Executor | undefined = config.executor; + + // Safety check to ensure `executor` is not configured while backwards-compatibility + // executor options are set at the top-level configuration. + if (executor !== undefined) { + const strictTopLevelSchema = environmentConfigSchema.strict().safeParse(config); + if (!strictTopLevelSchema.data || !strictTopLevelSchema.success) { + throw new Error( + fromZodError(strictTopLevelSchema.error, { + prefix: + `Environment config cannot contain local executor configuration ` + + `fields if \`executor\` is set.`, + prefixSeparator: '\n', + issueSeparator: '\n', + }).toString(), + ); + } + } else { + const backwardsCompatTopLevelConfig = localExecutorConfigSchema.passthrough().safeParse(config); + if (!backwardsCompatTopLevelConfig.data || !backwardsCompatTopLevelConfig.success) { + throw new Error( + fromZodError(backwardsCompatTopLevelConfig.error, { + prefix: 'Environment config does not properly configure local executor.', + prefixSeparator: '\n', + issueSeparator: '\n', + }).toString(), + ); + } + executor = new LocalExecutor(backwardsCompatTopLevelConfig.data, runnerCliOption); + } + const environment = new Environment(rootPath, {...config, executor}); environmentsCache.set(configPath, environment); return environmentsCache.get(configPath)!; } diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index d8ef952..ba9b918 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -1,4 +1,326 @@ -import {LocalEnvironment} from './environment-local.js'; -import {RemoteEnvironment} from './environment-remote.js'; +import {readdirSync, readFileSync, statSync} from 'fs'; +import {basename, dirname, extname, join, resolve} from 'path'; +import {globSync} from 'tinyglobby'; +import {Rating} from '../ratings/rating-types.js'; +import { + FrameworkInfo, + MultiStepPromptDefinition, + PromptDefinition, + RootPromptDefinition, +} from '../shared-interfaces.js'; +import {UserFacingError} from '../utils/errors.js'; +import {generateId} from '../utils/id-generation.js'; +import {lazy} from '../utils/lazy-creation.js'; +import {EnvironmentConfig} from './environment-config.js'; +import {MultiStepPrompt} from './multi-step-prompt.js'; +import {renderHandlebarsTemplate} from './prompt-templating.js'; +import {RunnerName} from '../codegen/runner-creation.js'; +import {Executor} from '../orchestration/executors/executor.js'; +import {LocalExecutor} from '../orchestration/executors/local-executor.js'; -export type Environment = LocalEnvironment | RemoteEnvironment; +/** Represents a single prompt evaluation environment. */ +export class Environment { + /** Path at which the environment is defined. */ + readonly rootPath: string; + /** Unique ID for the environment. */ + readonly id: string; + /** Display name of the environment. */ + readonly displayName: string; + /** Information about the fullstack framework used within the environment. */ + readonly fullStackFramework: FrameworkInfo; + /** Information about the client-side framework used within the environment. */ + readonly clientSideFramework: FrameworkInfo; + /** Prompts that should be executed as a part of the evaluation. */ + readonly executablePrompts: RootPromptDefinition[]; + /** Path from which to read the code rating prompt. */ + readonly codeRatingPromptPath: string | null; + /** Whether the prompts should be removed from the final report. */ + readonly classifyPrompts: boolean; + /** Whether this is one of the built-in environment that come with the runner. */ + readonly isBuiltIn: boolean; + /** Configured executor. */ + readonly executor: Executor; + + constructor( + rootPath: string, + private readonly config: EnvironmentConfig & Required>, + ) { + this.rootPath = rootPath; + this.id = config.id || this.generateId(config.displayName); + this.displayName = config.displayName; + this.clientSideFramework = { + id: config.clientSideFramework, + displayName: + this.getFrameworkDisplayName(config.clientSideFramework) || config.clientSideFramework, + }; + this.fullStackFramework = config.fullStackFramework + ? { + id: config.fullStackFramework, + displayName: + this.getFrameworkDisplayName(config.fullStackFramework) || config.clientSideFramework, + } + : {...this.clientSideFramework}; + this.executablePrompts = this.resolveExecutablePrompts( + config.executablePrompts, + config.ratings, + ); + this.codeRatingPromptPath = config.codeRatingPrompt + ? join(rootPath, config.codeRatingPrompt) + : null; + this.classifyPrompts = config.classifyPrompts ?? false; + this.isBuiltIn = rootPath.includes('node_modules'); + this.executor = config.executor; + } + + systemPromptGeneration = lazy(() => { + return this.renderRelativePrompt(this.config.generationSystemPrompt).result; + }); + + systemPromptRepair = lazy(() => { + if (!this.config.repairSystemPrompt) { + return 'Please fix the given errors and return the corrected code.'; + } + return this.renderRelativePrompt(this.config.repairSystemPrompt).result; + }); + + systemPromptEditing = lazy(() => { + if (!this.config.editingSystemPrompt) { + return this.systemPromptGeneration(); + } + return this.renderRelativePrompt(this.config.editingSystemPrompt).result; + }); + + /** + * Augments a prompt based on the environment's config. + * @param userPrompt Prompt that is being augmented. + * @param ragEndpoint Optional RAG endpoint to use when augmenting the prompt. + */ + async getPrompt( + type: 'generation' | 'editing', + userPrompt: string, + ragEndpoint?: string, + ): Promise { + const systemPrompt = + type === 'generation' + ? this.systemPromptGeneration() + : (this.systemPromptEditing() ?? this.systemPromptGeneration()); + + if (!ragEndpoint) { + return [systemPrompt, userPrompt].join('\n\n'); + } + + if (!ragEndpoint.includes('PROMPT')) { + throw new UserFacingError('The ragEndpoint must include the "PROMPT" substring.'); + } + const url = ragEndpoint.replace('PROMPT', encodeURIComponent(userPrompt)); + const response = await fetch(url); + if (!response.ok) { + throw new UserFacingError(`Failed to fetch from ${url}: ${response.statusText}`); + } + const ragContent = await response.text(); + return `${systemPrompt}\n\n${ragContent}`; + } + + /** + * Renders out a prompt with our custom templating support. + * @param content Raw content of the prompt. + * @param promptFilePath Path where the prompt is located. If null, embedding files into + * the prompt will not be supported. + * @param additionalContext Additional context variables to expose to the prompt. + */ + renderPrompt( + content: string, + promptFilePath: string | null, + additionalContext: Record = {}, + ) { + return renderHandlebarsTemplate(content, { + rootDir: promptFilePath ? dirname(promptFilePath) : null, + FULL_STACK_FRAMEWORK_NAME: this.fullStackFramework.displayName, + CLIENT_SIDE_FRAMEWORK_NAME: this.clientSideFramework.displayName, + ...additionalContext, + }); + } + + /** + * Gets the readable display name of a framework, based on its ID. + * @param id ID to be resolved. + */ + private getFrameworkDisplayName(id: string): string | null { + switch (id) { + case 'angular': + return 'Angular'; + case 'next': + return 'Next.js'; + case 'react': + return 'React'; + case 'vue': + return 'Vue.js'; + case 'svelte': + return 'Svelte'; + case 'solid': + return 'Solid.js'; + default: + return null; + } + } + + /** + * Resolves the prompt configuration into prompt definitions. + * @param rootPath Root path of the project. + * @param prompts Prompts to be resolved. + * @param envRatings Environment-level ratings. + */ + private resolveExecutablePrompts( + prompts: EnvironmentConfig['executablePrompts'], + envRatings: Rating[], + ) { + const result: RootPromptDefinition[] = []; + + for (const def of prompts) { + if (def instanceof MultiStepPrompt) { + result.push(this.getMultiStepPrompt(def, envRatings)); + } else { + let path: string; + let ratings: Rating[]; + let name: string | undefined = undefined; + + if (typeof def === 'string') { + path = def; + ratings = envRatings.slice(); + } else { + path = def.path; + ratings = [...(def.ratings ?? []), ...envRatings]; + name = def.name; + } + + globSync(path, {cwd: this.rootPath}).forEach(relativePath => { + result.push( + this.getStepPromptDefinition( + name ?? basename(relativePath, extname(relativePath)), + relativePath, + ratings, + /* isEditing */ false, + ), + ); + }); + } + } + + return result; + } + + /** + * Creates a prompt definition for a given step. + * + * @param name Name of the prompt. + * @param rootPath Root path of the project. + * @param relativePath Relative path to the prompt. + * @param ratings Ratings to run against the definition. + * @param isEditing Whether this is an editing or generation step. + */ + private getStepPromptDefinition( + name: string, + relativePath: string, + ratings: Rating[], + isEditing: boolean, + ): PromptDefinition { + const {result, contextFiles} = this.renderRelativePrompt(relativePath); + + return { + name: name, + kind: 'single', + prompt: result, + ratings, + systemPromptType: isEditing ? 'editing' : 'generation', + contextFilePatterns: contextFiles, + } satisfies PromptDefinition; + } + + /** + * Gets a multi-step form based on a configuration. + * @param rootPath Root path of the project. + * @param def Definition of the prompt. + * @param envRatings Environment-level ratings. + */ + private getMultiStepPrompt( + def: MultiStepPrompt, + envRatings: Rating[], + ): MultiStepPromptDefinition { + const promptRoot = resolve(this.rootPath, def.directoryPath); + const name = basename(promptRoot); + const steps: PromptDefinition[] = []; + const stepRegex = /^step-(\d+)/; + const stepValues: Record = {}; + + if (!statSync(promptRoot).isDirectory()) { + throw new UserFacingError( + `Multi-step prompt root must point to a directory. "${promptRoot}" is not a directory.`, + ); + } + + const entities = readdirSync(promptRoot, {withFileTypes: true}); + + if (entities.length === 0) { + throw new UserFacingError('Multi-step prompt directory cannot be empty.'); + } + + for (const current of entities) { + if (!current.isFile()) { + throw new UserFacingError( + `Multi-step prompt directory can only contain files. ${current.name} is not a file.`, + ); + } + + const match = current.name.match(stepRegex); + + if (!match || !match[1]) { + throw new UserFacingError( + `Multi-step prompt name must be in the form of \`step-\`, ` + + `but received '${current.name}'`, + ); + } + + const ratings = [...envRatings]; + + if (def.stepRatings[current.name]) { + ratings.unshift(...def.stepRatings[current.name]); + } + + const stepNum = parseInt(match[1]); + if (stepNum === 0) { + throw new UserFacingError('Multi-step prompts start with `step-1`.'); + } + const step = this.getStepPromptDefinition( + `${name}-step-${stepNum}`, + join(def.directoryPath, current.name), + ratings, + /*isEditing */ stepNum !== 1, + ); + + stepValues[step.name] = stepNum; + steps.push(step); + } + + return { + name, + kind: 'multi-step', + steps: steps.sort((a, b) => stepValues[a.name] - stepValues[b.name]), + } satisfies MultiStepPromptDefinition; + } + + private generateId(displayName: string): string { + const id = generateId(displayName); + + if (id === null) { + throw new UserFacingError(`Could not auto-generate an ID from "${displayName}"`); + } + + return id; + } + + /** Renders a prompt from a path relative to the environment config. */ + private renderRelativePrompt(relativePath: string) { + const path = resolve(this.rootPath, relativePath); + return this.renderPrompt(readFileSync(path, 'utf8'), path); + } +} diff --git a/runner/index.ts b/runner/index.ts index b3c35f8..5a471bf 100644 --- a/runner/index.ts +++ b/runner/index.ts @@ -1,12 +1,10 @@ export * from './shared-interfaces.js'; export * from './configuration/environment-config.js'; -export * from './orchestration/gateway.js'; -export * from './orchestration/gateways/local_gateway.js'; -export { - type RemoteEnvironmentConfig, - RemoteEnvironment, -} from './configuration/environment-remote.js'; -export {type LocalEnvironmentConfig, LocalEnvironment} from './configuration/environment-local.js'; +export * from './orchestration/executors/executor.js'; +export * from './orchestration/executors/local-executor-config.js'; +export * from './orchestration/executors/local-executor.js'; +export {type EnvironmentConfig} from './configuration/environment-config.js'; +export {Environment} from './configuration/environment.js'; export * from './ratings/built-in.js'; export * from './ratings/rating-types.js'; export * from './ratings/built-in-ratings/index.js'; @@ -29,13 +27,13 @@ export {generateCodeAndAssess} from './orchestration/generate.js'; export {groupSimilarReports} from './orchestration/grouping.js'; export { type LlmRunner, - type LlmGenerateFilesContext, - type LlmGenerateFilesRequestOptions, - type LlmGenerateTextRequestOptions, - type LlmConstrainedOutputGenerateRequestOptions, - type LlmConstrainedOutputGenerateResponse, - type LlmGenerateFilesResponse, - type LlmGenerateTextResponse, + type LocalLlmGenerateFilesContext, + type LocalLlmGenerateFilesRequestOptions, + type LocalLlmGenerateTextRequestOptions, + type LocalLlmConstrainedOutputGenerateRequestOptions, + type LocalLlmConstrainedOutputGenerateResponse, + type LocalLlmGenerateFilesResponse, + type LocalLlmGenerateTextResponse, type McpServerOptions, type PromptDataMessage, } from './codegen/llm-runner.js'; @@ -43,10 +41,10 @@ export {GenkitRunner} from './codegen/genkit/genkit-runner.js'; export {GeminiCliRunner} from './codegen/gemini-cli-runner.js'; export {getRunnerByName, type RunnerName} from './codegen/runner-creation.js'; export {getEnvironmentByPath} from './configuration/environment-resolution.js'; -export {type Environment} from './configuration/environment.js'; export {autoRateFiles} from './ratings/autoraters/rate-files.js'; export {fetchReportsFromDisk} from './reporting/report-local-disk.js'; export {type ProgressLogger, type ProgressType} from './progress/progress-logger.js'; export {DynamicProgressLogger} from './progress/dynamic-progress-logger.js'; export {NoopProgressLogger} from './progress/noop-progress-logger.js'; export {TextProgressLogger} from './progress/text-progress-logger.js'; +export {type ServeTestingResult} from './workers/serve-testing/worker-types.js'; diff --git a/runner/orchestration/build-repair.ts b/runner/orchestration/build-repair.ts index b275fa1..5e6b9e8 100644 --- a/runner/orchestration/build-repair.ts +++ b/runner/orchestration/build-repair.ts @@ -11,13 +11,12 @@ import {repairCodeWithAI} from './codegen.js'; import {writeResponseFiles} from './file-system.js'; import {runBuild} from './build-worker.js'; import {ProgressLogger} from '../progress/progress-logger.js'; -import {EvalID, Gateway} from './gateway.js'; +import {EvalID, Executor} from './executors/executor.js'; /** * Calls the LLM to repair code, handles the response, and attempts to build the project again. * * @param evalID ID of the eval being executed. - * @param gateway Gateway. * @param model The model name to use for the repair. * @param env The environment configuration. * @param directory The working directory. @@ -33,7 +32,6 @@ import {EvalID, Gateway} from './gateway.js'; */ export async function repairAndBuild( evalID: EvalID, - gateway: Gateway, model: string, env: Environment, rootPromptDef: RootPromptDefinition, @@ -49,7 +47,6 @@ export async function repairAndBuild( ): Promise { const repairResponse = await repairCodeWithAI( evalID, - gateway, model, env, rootPromptDef, @@ -64,7 +61,6 @@ export async function repairAndBuild( return await handleRepairResponse( evalID, - gateway, repairResponse, previousAttemptFiles, env, @@ -83,7 +79,6 @@ export async function repairAndBuild( */ async function handleRepairResponse( evalID: EvalID, - gateway: Gateway, repairResponse: LlmResponse, previousAttemptFiles: LlmResponseFile[], env: Environment, @@ -114,7 +109,6 @@ async function handleRepairResponse( const buildResult = await runBuild( evalID, - gateway, directory, env, rootPromptDef, diff --git a/runner/orchestration/build-serve-loop.ts b/runner/orchestration/build-serve-loop.ts index db806e9..67074eb 100644 --- a/runner/orchestration/build-serve-loop.ts +++ b/runner/orchestration/build-serve-loop.ts @@ -1,5 +1,5 @@ import PQueue from 'p-queue'; -import {LlmGenerateFilesResponse} from '../codegen/llm-runner.js'; +import {LocalLlmGenerateFilesResponse} from '../codegen/llm-runner.js'; import {BuildResultStatus} from '../workers/builder/builder-types.js'; import {Environment} from '../configuration/environment.js'; import { @@ -12,7 +12,7 @@ import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js'; import {ProgressLogger} from '../progress/progress-logger.js'; import {runBuild} from './build-worker.js'; import {repairAndBuild} from './build-repair.js'; -import {EvalID, Gateway} from './gateway.js'; +import {EvalID, Executor} from './executors/executor.js'; import {serveAndTestApp} from './serve-testing-worker.js'; import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; @@ -20,9 +20,8 @@ import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; * Attempts to build the code that an LLM generated. If the build fails, attempts * to fix the breakage and build again. * + * @param config Assessment config. * @param evalID ID of the eval being attempted for build. - * @param gateway Gateway. - * @param model Model to be used for repair generation requests. * @param env Environment that is currently being run. * @param rootPromptDef Definition of the root prompt. * @param directory Directory on disk to which to write. @@ -38,12 +37,11 @@ import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; export async function attemptBuild( config: AssessmentConfig, evalID: EvalID, - gateway: Gateway, env: Environment, rootPromptDef: RootPromptDefinition, directory: string, contextFiles: LlmContextFile[], - initialResponse: LlmGenerateFilesResponse, + initialResponse: LocalLlmGenerateFilesResponse, attemptDetails: AttemptDetails[], abortSignal: AbortSignal, workerConcurrencyQueue: PQueue, @@ -52,7 +50,6 @@ export async function attemptBuild( ) { const initialBuildResult = await runBuild( evalID, - gateway, directory, env, rootPromptDef, @@ -61,7 +58,7 @@ export async function attemptBuild( progress, ); let repairAttempts = 0; - const maxRepairAttempts = gateway.shouldRetryFailedBuilds(evalID) + const maxRepairAttempts = (await env.executor.shouldRepairFailedBuilds(evalID)) ? DEFAULT_MAX_REPAIR_ATTEMPTS : 0; @@ -92,7 +89,6 @@ export async function attemptBuild( const attempt = await repairAndBuild( evalID, - gateway, config.model, env, rootPromptDef, @@ -117,7 +113,6 @@ export async function attemptBuild( lastAttempt.serveTestingResult = await serveAndTestApp( config, evalID, - gateway, directory, env, rootPromptDef, @@ -154,7 +149,6 @@ export async function attemptBuild( const attempt = await repairAndBuild( evalID, - gateway, config.model, env, rootPromptDef, @@ -184,7 +178,6 @@ export async function attemptBuild( attempt.serveTestingResult = await serveAndTestApp( config, evalID, - gateway, directory, env, rootPromptDef, diff --git a/runner/orchestration/build-worker.ts b/runner/orchestration/build-worker.ts index 0c5f5d9..6e99340 100644 --- a/runner/orchestration/build-worker.ts +++ b/runner/orchestration/build-worker.ts @@ -2,13 +2,12 @@ import {BuildResult, BuildResultStatus} from '../workers/builder/builder-types.j import {Environment} from '../configuration/environment.js'; import {ProgressLogger} from '../progress/progress-logger.js'; import {RootPromptDefinition} from '../shared-interfaces.js'; -import {EvalID, Gateway} from './gateway.js'; +import {EvalID, Executor} from './executors/executor.js'; import PQueue from 'p-queue'; /** Attempts to build the code. */ export async function runBuild( evalID: EvalID, - gateway: Gateway, appDirectoryPath: string, env: Environment, rootPromptDef: RootPromptDefinition, @@ -19,9 +18,8 @@ export async function runBuild( progress.log(rootPromptDef, 'build', `Building the app`); try { - const result = await gateway.tryBuild( + const result = await env.executor.performBuild( evalID, - env, appDirectoryPath, rootPromptDef, workerConcurrencyQueue, diff --git a/runner/orchestration/codegen.ts b/runner/orchestration/codegen.ts index 0ff1097..bacf398 100644 --- a/runner/orchestration/codegen.ts +++ b/runner/orchestration/codegen.ts @@ -1,17 +1,18 @@ import { LlmContextFile, + LlmGenerateFilesRequest, LlmResponse, LlmResponseFile, RootPromptDefinition, ToolLogEntry, Usage, } from '../shared-interfaces.js'; -import {LlmGenerateFilesContext, LlmRunner, PromptDataMessage} from '../codegen/llm-runner.js'; +import {LlmRunner, LocalLlmGenerateFilesContext, PromptDataMessage} from '../codegen/llm-runner.js'; import {Environment} from '../configuration/environment.js'; import {getPossiblePackageManagers} from '../configuration/environment-config.js'; import {ProgressLogger} from '../progress/progress-logger.js'; -import {EvalID, Gateway} from './gateway.js'; -import {LocalEnvironment} from '../configuration/environment-local.js'; +import {EvalID} from './executors/executor.js'; +import {LocalExecutor} from './executors/local-executor.js'; /** * Generates code using the configured AI model based on the provided prompt. @@ -19,7 +20,7 @@ import {LocalEnvironment} from '../configuration/environment-local.js'; export async function generateCodeWithAI( llm: LlmRunner, model: string, - codegenContext: LlmGenerateFilesContext, + codegenContext: LocalLlmGenerateFilesContext, contextFiles: LlmContextFile[], abortSignal: AbortSignal, ): Promise { @@ -88,7 +89,6 @@ export async function generateCodeWithAI( */ export async function repairCodeWithAI( evalID: EvalID, - gateway: Gateway, model: string, env: Environment, promptDef: RootPromptDefinition, @@ -111,19 +111,16 @@ export async function repairCodeWithAI( ...appFiles.map(file => `${file.filePath}:\n\`\`\`\n${file.code}\`\`\`\n\n`), ].join('\n'); - const context: LlmGenerateFilesContext = { + const context: LlmGenerateFilesRequest = { directory, systemInstructions: repairSystemInstructions, executablePrompt: repairPrompt, combinedPrompt: `${repairSystemInstructions}\n${repairPrompt}`, - packageManager: env instanceof LocalEnvironment ? env.packageManager : undefined, - buildCommand: env instanceof LocalEnvironment ? env.buildCommand : undefined, - possiblePackageManagers: getPossiblePackageManagers().slice(), }; progress.log(promptDef, 'codegen', 'Repairing code with AI'); - const response = await gateway.repairBuild( + const response = await env.executor.generateRepairFiles( evalID, context, model, diff --git a/runner/orchestration/executors/executor.ts b/runner/orchestration/executors/executor.ts new file mode 100644 index 0000000..d6a37e1 --- /dev/null +++ b/runner/orchestration/executors/executor.ts @@ -0,0 +1,104 @@ +import PQueue from 'p-queue'; +import {ProgressLogger} from '../../progress/progress-logger.js'; +import { + LlmContextFile, + LlmGenerateFilesRequest, + LlmResponse, + LlmResponseFile, + RootPromptDefinition, +} from '../../shared-interfaces.js'; +import {BuildResult} from '../../workers/builder/builder-types.js'; +import z from 'zod'; +import {ServeTestingResult} from '../../workers/serve-testing/worker-types.js'; + +export type EvalID = string & {__evalID: true}; + +// Needed for portability of the `PQueue` type. +export type WorkerQueueType = PQueue; + +export const executorSchema = z.object({ + initializeEval: z.function(z.tuple([]), z.promise(z.custom())), + generateInitialFiles: z.function( + z.tuple([ + z.custom().describe('ID of the eval'), + z.custom().describe('Request info'), + z.string().describe('Configured model for the generation request'), + z.array(z.custom()).describe('Context files for the generation request.'), + z.custom().describe('Abort Signal to fire when the request should be canceled.'), + ]), + z.promise(z.custom()), + ), + generateRepairFiles: z.function( + z.tuple([ + z.custom().describe('ID of the eval'), + z.custom().describe('Request info'), + z.string().describe('Configured model for the generation request'), + z.string().describe('Error Message that should be repaired'), + z.array(z.custom()).describe('App files that were generated before.'), + z.array(z.custom()).describe('Context files for the generation request.'), + z.custom().describe('Abort Signal to fire when the request should be canceled.'), + ]), + z.promise(z.custom()), + ), + shouldRepairFailedBuilds: z.function( + z.tuple([z.custom().describe('ID of the eval')]), + z.promise(z.boolean()), + ), + performBuild: z.function( + z.tuple([ + z.custom().describe('ID of the eval'), + z.string().describe('Path to the application directory'), + z.custom().describe('Root prompt definition'), + z + .custom() + .describe('Worker concurrency queue. Use this for limiting local workers.'), + z.custom().describe('Abort Signal to fire when the request should be canceled.'), + z.custom().describe('Progress logger'), + ]), + z.promise(z.custom()), + ), + serveWebApplication: z.function( + z.tuple([ + z.custom().describe('ID of the eval'), + z.string().describe('Path to the application directory'), + z.custom().describe('Root prompt definition'), + z.custom().describe('Progress logger'), + z + .function( + z.tuple([z.string().describe('URL of the running server')]), + z.promise(z.custom()), + ) + .describe('Call this function while the server is running'), + ]), + z.promise(z.custom()), + ), + finalizeEval: z.function( + z.tuple([z.custom().describe('ID of the eval')]), + z.promise(z.void()), + ), + isSupportedModel: z.function( + z.tuple([z.string().describe('Model specified via command line flag')]), + z.promise( + z.object({ + supported: z.boolean(), + availableModels: z + .array(z.string()) + .optional() + .describe('List of available models, if known.'), + }), + ), + ), + destroy: z.function(z.tuple([]), z.promise(z.void())), + getExecutorInfo: z.function( + z.tuple([]), + z.promise( + z.object({ + id: z.string().describe('Unique ID of the executor'), + displayName: z.string().describe('Display name of the runner'), + mcpServersLaunched: z.number().describe('Number of MCP servers launched'), + }), + ), + ), +}); + +export type Executor = z.infer; diff --git a/runner/orchestration/executors/local-executor-config.ts b/runner/orchestration/executors/local-executor-config.ts new file mode 100644 index 0000000..d90cfbb --- /dev/null +++ b/runner/orchestration/executors/local-executor-config.ts @@ -0,0 +1,34 @@ +import z from 'zod'; +import {mcpServerOptionsSchema} from '../../codegen/llm-runner.js'; +import {getPossiblePackageManagers} from '../../configuration/environment-config.js'; + +export const localExecutorConfigSchema = z.strictObject({ + /** MCP servers that can be started for this environment. */ + mcpServers: z.array(mcpServerOptionsSchema).optional(), + /** Relative path to the environment's source code in which to generate new code. */ + sourceDirectory: z.string().optional(), + /** + * Path to the template directory to use when creating + * the project which the LLM will run against. + */ + projectTemplate: z.string().optional(), + /** Package manager to use for the eval. */ + packageManager: z.enum(getPossiblePackageManagers()).optional().default('npm'), + /** + * Command to run when building the generated code. + * Defaults to ` run build`. + */ + buildCommand: z.string().optional(), + /** + * Command to run when starting a development server inside the app. + * Defaults to ` run start --port 0`. + */ + serveCommand: z.string().optional(), + /** + * Whether to skip installing dependencies when running evals in the environment. + * Useful if you're managing dependencies yourself. + */ + skipInstall: z.boolean().optional(), +}); + +export type LocalExecutorConfig = z.infer; diff --git a/runner/orchestration/executors/local-executor.ts b/runner/orchestration/executors/local-executor.ts new file mode 100644 index 0000000..7c3dcf8 --- /dev/null +++ b/runner/orchestration/executors/local-executor.ts @@ -0,0 +1,205 @@ +import {ChildProcess, fork} from 'node:child_process'; +import path, {join} from 'node:path'; +import PQueue from 'p-queue'; +import {LlmRunner} from '../../codegen/llm-runner.js'; +import {getRunnerByName, RunnerName} from '../../codegen/runner-creation.js'; +import {ProgressLogger} from '../../progress/progress-logger.js'; +import { + LlmContextFile, + LlmGenerateFilesRequest, + LlmResponse, + LlmResponseFile, + RootPromptDefinition, +} from '../../shared-interfaces.js'; +import {killChildProcessGracefully} from '../../utils/kill-gracefully.js'; +import { + BuildResult, + BuildWorkerMessage, + BuildWorkerResponseMessage, +} from '../../workers/builder/builder-types.js'; +import {serveApp} from '../../workers/serve-testing/serve-app.js'; +import {generateCodeWithAI} from '../codegen.js'; +import {EvalID, Executor} from './executor.js'; +import {LocalExecutorConfig} from './local-executor-config.js'; +import {getPossiblePackageManagers} from '../../configuration/environment-config.js'; + +let uniqueIDs = 0; + +export class LocalExecutor implements Executor { + private llm: Promise; + + constructor( + public config: LocalExecutorConfig, + runnerName: RunnerName = 'genkit', + ) { + this.llm = getRunnerByName(runnerName); + } + + async initializeEval(): Promise { + return `${uniqueIDs++}` as EvalID; + } + + async generateInitialFiles( + _id: EvalID, + requestCtx: LlmGenerateFilesRequest, + model: string, + contextFiles: LlmContextFile[], + abortSignal: AbortSignal, + ): Promise { + return await generateCodeWithAI( + await this.llm, + model, + { + ...requestCtx, + packageManager: this.config.packageManager, + buildCommand: this.getBuildCommand(), + possiblePackageManagers: getPossiblePackageManagers().slice(), + }, + contextFiles, + abortSignal, + ); + } + + async generateRepairFiles( + _id: EvalID, + requestCtx: LlmGenerateFilesRequest, + model: string, + errorMessage: string, + appFiles: LlmResponseFile[], + contextFiles: LlmContextFile[], + abortSignal: AbortSignal, + ): Promise { + return await generateCodeWithAI( + await this.llm, + model, + { + ...requestCtx, + packageManager: this.config.packageManager, + buildCommand: this.getBuildCommand(), + possiblePackageManagers: getPossiblePackageManagers().slice(), + }, + contextFiles, + abortSignal, + ); + } + + performBuild( + _id: EvalID, + appDirectoryPath: string, + rootPromptDef: RootPromptDefinition, + workerConcurrencyQueue: PQueue, + abortSignal: AbortSignal, + progress: ProgressLogger, + ): Promise { + const buildParams: BuildWorkerMessage = { + directory: appDirectoryPath, + appName: rootPromptDef.name, + buildCommand: this.getBuildCommand(), + }; + return workerConcurrencyQueue.add( + () => + new Promise((resolve, reject) => { + const child: ChildProcess = fork( + path.resolve(import.meta.dirname, '../../workers/builder/worker.js'), + {signal: abortSignal}, + ); + child.send(buildParams); + + child.on('message', async (result: BuildWorkerResponseMessage) => { + await killChildProcessGracefully(child); + resolve(result.payload); + }); + child.on('error', async err => { + await killChildProcessGracefully(child); + reject(err); + }); + }), + ); + } + + async serveWebApplication( + _id: EvalID, + appDirectoryPath: string, + rootPromptDef: RootPromptDefinition, + progress: ProgressLogger, + logicWhileServing: (serveUrl: string) => Promise, + ): Promise { + return await serveApp( + this.getServeCommand(), + rootPromptDef, + appDirectoryPath, + progress, + logicWhileServing, + ); + } + + async shouldRepairFailedBuilds(): Promise { + return (await this.llm).hasBuiltInRepairLoop === false; + } + + async finalizeEval(_id: EvalID): Promise {} + + async isSupportedModel(name: string) { + const availableModels = (await this.llm).getSupportedModels(); + return { + supported: availableModels.includes(name), + availableModels, + }; + } + + async destroy(): Promise { + await (await this.llm)?.dispose(); + } + + getServeCommand(): string { + const flags = '--port 0'; + + // npm needs -- to pass flags to the command. + if (this.config.packageManager === 'npm') { + return `npm run start -- ${flags}`; + } + + return `${this.config.packageManager} run start ${flags}`; + } + + getBuildCommand(): string { + return this.config.buildCommand ?? `${this.config.packageManager} run build`; + } + + getInstallCommand(): string { + return `${this.config.packageManager} install --silent`; + } + + async getExecutorInfo() { + return { + id: (await this.llm).id, + displayName: (await this.llm).displayName, + mcpServersLaunched: this.config.mcpServers?.length ?? 0, + }; + } + + async startMcpServerHost(hostName: string) { + const llm = await this.llm; + if (llm.startMcpServerHost === undefined) { + return; + } + + llm.startMcpServerHost(hostName, this.config.mcpServers ?? []); + } + + async collectMcpServerLogs() { + const llm = await this.llm; + if (llm.flushMcpServerLogs === undefined) { + return; + } + + return { + servers: (this.config.mcpServers ?? []).map(m => ({ + name: m.name, + command: m.command, + args: m.args, + })), + logs: llm.flushMcpServerLogs().join('\n'), + }; + } +} diff --git a/runner/orchestration/file-system.ts b/runner/orchestration/file-system.ts index e9e5530..94ca789 100644 --- a/runner/orchestration/file-system.ts +++ b/runner/orchestration/file-system.ts @@ -15,7 +15,7 @@ import {globSync} from 'tinyglobby'; import {executeCommand} from '../utils/exec.js'; import {UserFacingError} from '../utils/errors.js'; import {ProgressLogger} from '../progress/progress-logger.js'; -import {LocalEnvironment} from '../configuration/environment-local.js'; +import {LocalExecutor} from './executors/local-executor.js'; const SYMLINK_PROJECT_PATHS = new Set(['node_modules']); const PENDING_INSTALLS = new Map>(); @@ -59,26 +59,42 @@ export async function setupProjectStructure( const directoriesToCopy: string[] = []; - if (env instanceof LocalEnvironment && env.projectTemplatePath) { + if (env.executor instanceof LocalExecutor && env.executor.config.projectTemplate) { + const projectTemplatePath = join(env.rootPath, env.executor.config.projectTemplate); + // Copy the template files first. - directoriesToCopy.push(env.projectTemplatePath); + directoriesToCopy.push(projectTemplatePath); // Run the install command in the template directory directly. This way multiple // evals can reuse the same dependencies. It also allows pnpm workspaces to work // properly since we might not have copied the `pnpm-workspaces.yml`. if (!env.isBuiltIn) { - await installDependenciesInDirectory(env, rootPromptDef, env.projectTemplatePath, progress); + await installDependenciesInDirectory( + env, + env.executor, + rootPromptDef, + projectTemplatePath, + progress, + ); } } - if (env instanceof LocalEnvironment && env.sourceDirectory) { + if (env.executor instanceof LocalExecutor && env.executor.config.sourceDirectory) { + const sourceDirectory = join(env.rootPath, env.executor.config.sourceDirectory); + // Push this after the project so the environment's files that precedence. - directoriesToCopy.push(env.sourceDirectory); + directoriesToCopy.push(sourceDirectory); // Also try to install dependencies in the source directory, // because it may be overriding the ones from the template. if (!env.isBuiltIn) { - await installDependenciesInDirectory(env, rootPromptDef, env.sourceDirectory, progress); + await installDependenciesInDirectory( + env, + env.executor, + rootPromptDef, + sourceDirectory, + progress, + ); } } @@ -95,8 +111,8 @@ export async function setupProjectStructure( // If the environment is built in, it'll likely be inside of the user's `node_modules`. // Since running an installation inside `node_modules` can be problematic, we install // in the temporary directory instead. This can be slower, but is more reliable. - if (env instanceof LocalEnvironment && env.isBuiltIn) { - await installDependenciesInDirectory(env, rootPromptDef, directory, progress); + if (env.executor instanceof LocalExecutor && env.isBuiltIn) { + await installDependenciesInDirectory(env, env.executor, rootPromptDef, directory, progress); } return {directory, cleanup}; @@ -104,17 +120,19 @@ export async function setupProjectStructure( /** Run the package manager install command in a specific directory. */ function installDependenciesInDirectory( - env: LocalEnvironment, + env: Environment, + localExecutor: LocalExecutor, rootPromptDef: RootPromptDefinition, directory: string, progress: ProgressLogger, ): Promise { // The install script will error out if there's no `package.json`. - if (env.skipInstall || !existsSync(join(directory, 'package.json'))) { + if (localExecutor.config.skipInstall || !existsSync(join(directory, 'package.json'))) { return Promise.resolve(); } - const key = `${directory}#${env.installCommand}`; + const installCommand = localExecutor.getInstallCommand(); + const key = `${directory}#${installCommand}`; let pendingCommand = PENDING_INSTALLS.get(key); progress.log(rootPromptDef, 'build', 'Installing dependencies'); @@ -124,7 +142,7 @@ function installDependenciesInDirectory( return pendingCommand; } - pendingCommand = executeCommand(env.installCommand, directory, undefined, { + pendingCommand = executeCommand(installCommand, directory, undefined, { forwardStderrToParent: true, }) .then(() => { diff --git a/runner/orchestration/gateway.ts b/runner/orchestration/gateway.ts deleted file mode 100644 index 7e2bf01..0000000 --- a/runner/orchestration/gateway.ts +++ /dev/null @@ -1,63 +0,0 @@ -import PQueue from 'p-queue'; -import {LlmGenerateFilesContext} from '../codegen/llm-runner.js'; -import {Environment} from '../configuration/environment.js'; -import {ProgressLogger} from '../progress/progress-logger.js'; -import { - LlmContextFile, - LlmResponse, - LlmResponseFile, - RootPromptDefinition, -} from '../shared-interfaces.js'; -import {BuildResult} from '../workers/builder/builder-types.js'; - -export type EvalID = string & {__evalID: true}; - -export interface Gateway { - /** Initializes an eval. */ - initializeEval(): Promise; - - /** Generates initial files for an eval. */ - generateInitialFiles( - id: EvalID, - requestCtx: LlmGenerateFilesContext, - model: string, - contextFiles: LlmContextFile[], - abortSignal: AbortSignal, - ): Promise; - - repairBuild( - id: EvalID, - requestCtx: LlmGenerateFilesContext, - model: string, - errorMessage: string, - appFiles: LlmResponseFile[], - contextFiles: LlmContextFile[], - abortSignal: AbortSignal, - ): Promise; - - shouldRetryFailedBuilds(evalID: EvalID): boolean; - - tryBuild( - id: EvalID, - env: Env, - appDirectoryPath: string, - rootPromptDef: RootPromptDefinition, - workerConcurrencyQueue: PQueue, - abortSignal: AbortSignal, - progress: ProgressLogger, - ): Promise; - - serveBuild( - id: EvalID, - env: Env, - appDirectoryPath: string, - rootPromptDef: RootPromptDefinition, - progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise, - ): Promise; - - finalizeEval(id: EvalID): Promise; - - // TODO: Consider supporting in the future. - // rateBuild(id: EvalID): AssessmentResult[]; -} diff --git a/runner/orchestration/gateways/local_gateway.ts b/runner/orchestration/gateways/local_gateway.ts deleted file mode 100644 index 066c159..0000000 --- a/runner/orchestration/gateways/local_gateway.ts +++ /dev/null @@ -1,112 +0,0 @@ -import {ChildProcess, fork} from 'node:child_process'; -import { - BuildResult, - BuildWorkerMessage, - BuildWorkerResponseMessage, -} from '../../workers/builder/builder-types.js'; -import {LlmGenerateFilesContext, LlmRunner} from '../../codegen/llm-runner.js'; -import { - RootPromptDefinition, - LlmContextFile, - LlmResponse, - LlmResponseFile, -} from '../../shared-interfaces.js'; -import {generateCodeWithAI} from '../codegen.js'; -import {EvalID, Gateway} from '../gateway.js'; -import path from 'node:path'; -import {killChildProcessGracefully} from '../../utils/kill-gracefully.js'; -import {ProgressLogger} from '../../progress/progress-logger.js'; -import {serveApp} from '../../workers/serve-testing/serve-app.js'; -import {LocalEnvironment} from '../../configuration/environment-local.js'; -import PQueue from 'p-queue'; - -let uniqueIDs = 0; - -export class LocalGateway implements Gateway { - constructor(private llm: LlmRunner) {} - - async initializeEval(): Promise { - return `${uniqueIDs++}` as EvalID; - } - - async generateInitialFiles( - _id: EvalID, - requestCtx: LlmGenerateFilesContext, - model: string, - contextFiles: LlmContextFile[], - abortSignal: AbortSignal, - ): Promise { - return await generateCodeWithAI(this.llm, model, requestCtx, contextFiles, abortSignal); - } - - async repairBuild( - _id: EvalID, - requestCtx: LlmGenerateFilesContext, - model: string, - errorMessage: string, - appFiles: LlmResponseFile[], - contextFiles: LlmContextFile[], - abortSignal: AbortSignal, - ): Promise { - return await generateCodeWithAI(this.llm, model, requestCtx, contextFiles, abortSignal); - } - - tryBuild( - _id: EvalID, - env: LocalEnvironment, - appDirectoryPath: string, - rootPromptDef: RootPromptDefinition, - workerConcurrencyQueue: PQueue, - abortSignal: AbortSignal, - progress: ProgressLogger, - ): Promise { - const buildParams: BuildWorkerMessage = { - directory: appDirectoryPath, - appName: rootPromptDef.name, - buildCommand: env.buildCommand, - }; - - return workerConcurrencyQueue.add( - () => - new Promise((resolve, reject) => { - const child: ChildProcess = fork( - path.resolve(import.meta.dirname, '../../workers/builder/worker.js'), - {signal: abortSignal}, - ); - child.send(buildParams); - - child.on('message', async (result: BuildWorkerResponseMessage) => { - await killChildProcessGracefully(child); - resolve(result.payload); - }); - child.on('error', async err => { - await killChildProcessGracefully(child); - reject(err); - }); - }), - ); - } - - async serveBuild( - _id: EvalID, - env: LocalEnvironment, - appDirectoryPath: string, - rootPromptDef: RootPromptDefinition, - progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise, - ): Promise { - return await serveApp( - env.serveCommand, - rootPromptDef, - appDirectoryPath, - progress, - logicWhileServing, - ); - } - - shouldRetryFailedBuilds(): boolean { - return this.llm.hasBuiltInRepairLoop === false; - } - - async finalizeEval(_id: EvalID): Promise {} -} diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts index f322502..89e160c 100644 --- a/runner/orchestration/generate.ts +++ b/runner/orchestration/generate.ts @@ -5,11 +5,7 @@ import {randomUUID} from 'crypto'; import PQueue from 'p-queue'; import {basename, join} from 'path'; import {existsSync, readdirSync} from 'fs'; -import { - assertValidModelName, - LlmGenerateFilesContext, - LlmGenerateFilesResponse, -} from '../codegen/llm-runner.js'; +import {assertValidModelName, LocalLlmGenerateFilesResponse} from '../codegen/llm-runner.js'; import { DEFAULT_AUTORATER_MODEL_NAME, LLM_OUTPUT_DIR, @@ -24,6 +20,7 @@ import { AttemptDetails, CompletionStats, LlmContextFile, + LlmGenerateFilesRequest, MultiStepPromptDefinition, PromptDefinition, RootPromptDefinition, @@ -40,7 +37,6 @@ import {generateUserJourneysForApp} from './user-journeys.js'; import {resolveContextFiles, setupProjectStructure, writeResponseFiles} from './file-system.js'; import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; import {getEnvironmentByPath} from '../configuration/environment-resolution.js'; -import {getPossiblePackageManagers} from '../configuration/environment-config.js'; import {ProgressLogger} from '../progress/progress-logger.js'; import {TextProgressLogger} from '../progress/text-progress-logger.js'; import {logReportHeader} from '../reporting/report-logging.js'; @@ -48,10 +44,10 @@ import {DynamicProgressLogger} from '../progress/dynamic-progress-logger.js'; import {UserFacingError} from '../utils/errors.js'; import {getRunGroupId} from './grouping.js'; import {executeCommand} from '../utils/exec.js'; -import {EvalID, Gateway} from './gateway.js'; -import {LocalEnvironment} from '../configuration/environment-local.js'; import {getRunnerByName} from '../codegen/runner-creation.js'; import {summarizeReportWithAI} from '../reporting/report-ai-summary.js'; +import {LocalExecutor} from './executors/local-executor.js'; +import {EvalID} from './executors/executor.js'; /** * Orchestrates the entire assessment process for each prompt defined in the `prompts` array. @@ -69,10 +65,7 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner); const ratingLlm = await getRunnerByName('genkit'); - // TODO(devversion): Consider validating model names also for remote environments. - if (env instanceof LocalEnvironment) { - assertValidModelName(options.model, env.llm.getSupportedModels()); - } + await assertValidModelName(options.model, env.executor); try { const promptsToProcess = getCandidateExecutablePrompts( @@ -105,13 +98,8 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< // We need Chrome to collect runtime information. await installChrome(); - if ( - env instanceof LocalEnvironment && - options.startMcp && - env.mcpServerOptions.length && - env.llm.startMcpServerHost - ) { - env.llm.startMcpServerHost(`mcp-${env.clientSideFramework.id}`, env.mcpServerOptions); + if (options.startMcp && env.executor instanceof LocalExecutor) { + env.executor.startMcpServerHost(`mcp-${env.clientSideFramework.id}`); } progress.initialize(promptsToProcess.length); @@ -133,7 +121,7 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< for (const rootPromptDef of promptsToProcess) { allTasks.push( appConcurrencyQueue.add(async () => { - const evalID = await env.gateway.initializeEval(); + const evalID = await env.executor.initializeEval(); let results: AssessmentResult[] | undefined; try { @@ -144,7 +132,6 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< options, evalID, env, - env.gateway, ratingLlm, rootPromptDef, abortSignal, @@ -171,7 +158,7 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< return [] satisfies AssessmentResult[]; } finally { progress.evalFinished(rootPromptDef, results || []); - await env.gateway.finalizeEval(evalID); + await env.executor.finalizeEval(evalID); } }), ); @@ -186,19 +173,8 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< progress.finalize(); const mcp = - env instanceof LocalEnvironment && - options.startMcp && - env.mcpServerOptions.length > 0 && - env.llm.startMcpServerHost && - env.llm.flushMcpServerLogs - ? { - servers: env.mcpServerOptions.map(m => ({ - name: m.name, - command: m.command, - args: m.args, - })), - logs: env.llm.flushMcpServerLogs().join('\n'), - } + env.executor instanceof LocalExecutor && options.startMcp + ? await env.executor.collectMcpServerLogs() : undefined; const timestamp = new Date(); @@ -232,9 +208,7 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< details, } satisfies RunInfo; } finally { - if (env instanceof LocalEnvironment) { - await env.llm.dispose(); - } + await env.executor.destroy(); } } @@ -247,7 +221,6 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< * * @param evalID ID of the evaluation task. * @param env Environment for this evaluation. - * @param gateway Gateway. * @param model Name of the LLM to use. * @param rootPromptDef Definition of the root prompt being processed. * @param localMode A boolean indicating whether to load code from local files instead of generating it. @@ -263,7 +236,6 @@ async function startEvaluationTask( config: AssessmentConfig, evalID: EvalID, env: Environment, - gateway: Gateway, ratingLlm: GenkitRunner, rootPromptDef: PromptDefinition | MultiStepPromptDefinition, abortSignal: AbortSignal, @@ -302,9 +274,6 @@ async function startEvaluationTask( systemInstructions, combinedPrompt: fullPromptText, executablePrompt: promptDef.prompt, - packageManager: env instanceof LocalEnvironment ? env.packageManager : undefined, - buildCommand: env instanceof LocalEnvironment ? env.buildCommand : undefined, - possiblePackageManagers: getPossiblePackageManagers().slice(), }, contextFiles, abortSignal, @@ -378,7 +347,6 @@ async function startEvaluationTask( const attempt = await attemptBuild( config, evalID, - gateway, env, rootPromptDef, directory, @@ -436,7 +404,6 @@ async function startEvaluationTask( /** * Generates the initial files for a prompt using an LLM. * @param evalID ID of the eval for which files are generated. - * @param gateway Gateway. * @param model Name of the model used for generation. * @param env Environment that is currently being run. * @param promptName Name of the prompt being generated. @@ -450,11 +417,11 @@ async function generateInitialFiles( evalID: EvalID, env: Environment, promptDef: RootPromptDefinition, - codegenContext: LlmGenerateFilesContext, + codegenRequest: LlmGenerateFilesRequest, contextFiles: LlmContextFile[], abortSignal: AbortSignal, progress: ProgressLogger, -): Promise { +): Promise { if (options.localMode) { const localFilesDirectory = join(LLM_OUTPUT_DIR, env.id, promptDef.name); const filePaths = globSync('**/*', {cwd: localFilesDirectory}); @@ -482,9 +449,9 @@ async function generateInitialFiles( progress.log(promptDef, 'codegen', 'Generating code with AI'); - const response = await env.gateway.generateInitialFiles( + const response = await env.executor.generateInitialFiles( evalID, - codegenContext, + codegenRequest, options.model, contextFiles, abortSignal, @@ -564,6 +531,8 @@ async function prepareSummary( } } + const executorInfo = await env.executor.getExecutorInfo?.(); + return { model, environmentId: env.id, @@ -586,8 +555,8 @@ async function prepareSummary( totalTokens, }, runner: { - id: env instanceof LocalEnvironment ? env.llm.id : 'remote', - displayName: env instanceof LocalEnvironment ? env.llm.displayName : 'Remote', + id: executorInfo.id, + displayName: executorInfo.displayName, }, } satisfies RunSummary; } diff --git a/runner/orchestration/serve-testing-worker.ts b/runner/orchestration/serve-testing-worker.ts index c5ae35f..936b203 100644 --- a/runner/orchestration/serve-testing-worker.ts +++ b/runner/orchestration/serve-testing-worker.ts @@ -9,7 +9,7 @@ import { ServeTestingWorkerMessage, ServeTestingWorkerResponseMessage, } from '../workers/serve-testing/worker-types.js'; -import {EvalID, Gateway} from './gateway.js'; +import {EvalID, Executor} from './executors/executor.js'; import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; import PQueue from 'p-queue'; @@ -17,7 +17,6 @@ import PQueue from 'p-queue'; export async function serveAndTestApp( config: AssessmentConfig, evalID: EvalID, - gateway: Gateway, appDirectoryPath: string, env: Environment, rootPromptDef: RootPromptDefinition, @@ -28,9 +27,8 @@ export async function serveAndTestApp( ): Promise { progress.log(rootPromptDef, 'serve-testing', `Testing the app`); - const result = await gateway.serveBuild( + const result = await env.executor.serveWebApplication( evalID, - env, appDirectoryPath, rootPromptDef, progress, diff --git a/runner/reporting/report-logging.ts b/runner/reporting/report-logging.ts index b0dd85f..0e67455 100644 --- a/runner/reporting/report-logging.ts +++ b/runner/reporting/report-logging.ts @@ -16,9 +16,8 @@ import { formatTitleCard, } from './format.js'; import {Environment} from '../configuration/environment.js'; -import {LlmRunner} from '../codegen/llm-runner.js'; import {groupSimilarReports} from '../orchestration/grouping.js'; -import {LocalEnvironment} from '../configuration/environment-local.js'; +import {LocalExecutor} from '../orchestration/executors/local-executor.js'; /** * Generates a structured report on fs, based on the assessment run information. @@ -135,7 +134,7 @@ export async function writeReportToDisk(runInfo: RunInfo, id: string): Promise { + const executorInfo = await env.executor.getExecutorInfo(); + const mcpServerCount = executorInfo?.mcpServersLaunched ?? null; const titleCardText = [ 'Running a codegen evaluation with configuration:', '', @@ -154,9 +155,9 @@ export function logReportHeader( options.autoraterModel && options.autoraterModel !== DEFAULT_AUTORATER_MODEL_NAME ? ` - Autorater model: ${options.autoraterModel}` : null, - ` - Runner: ${env instanceof LocalEnvironment ? env.llm.displayName : 'Remote'}`, - env instanceof LocalEnvironment - ? ` - MCP servers: ${options.startMcp && env.mcpServerOptions.length ? env.mcpServerOptions.length : 'none'}` + ` - Runner: ${executorInfo.displayName}`, + mcpServerCount !== null + ? ` - MCP servers: ${options.startMcp && mcpServerCount > 0 ? mcpServerCount : 'none'}` : null, options.labels.length ? ` - Labels: ${options.labels.join(', ')}` : null, ` - Concurrency: ${concurrency}`, diff --git a/runner/run-cli.ts b/runner/run-cli.ts index 2183fd5..bd0500d 100644 --- a/runner/run-cli.ts +++ b/runner/run-cli.ts @@ -14,7 +14,7 @@ import {serveApp} from './workers/serve-testing/serve-app.js'; import {ProgressLogger, ProgressType} from './progress/progress-logger.js'; import {formatTitleCard, redX} from './reporting/format.js'; import {NoopProgressLogger} from './progress/noop-progress-logger.js'; -import {LocalEnvironment} from './configuration/environment-local.js'; +import {LocalExecutor} from './orchestration/executors/local-executor.js'; export const RunModule = { builder, @@ -61,7 +61,7 @@ async function runApp(options: Options) { const {environment, rootPromptDef, files} = await resolveConfig(options); const progress = new ErrorOnlyProgressLogger(); - if (!(environment instanceof LocalEnvironment)) { + if (!(environment.executor instanceof LocalExecutor)) { console.error(`${redX()} Unable to run eval app locally for a remote environment.`); return; } @@ -92,7 +92,7 @@ async function runApp(options: Options) { await writeResponseFiles(directory, files, environment, rootPromptDef.name); await serveApp( - environment.serveCommand, + environment.executor.getServeCommand(), rootPromptDef, directory, new NoopProgressLogger(), diff --git a/runner/shared-interfaces.ts b/runner/shared-interfaces.ts index 9d78eea..d3cd067 100644 --- a/runner/shared-interfaces.ts +++ b/runner/shared-interfaces.ts @@ -547,3 +547,20 @@ export interface RunGroup { /** Runner used to generate code for the runs in the group. */ runner?: CodegenRunnerInfo; } + +/** Request information for a file generation. */ +export interface LlmGenerateFilesRequest { + /** System instructions that should be included. */ + systemInstructions: string; + /** Prompt being executed. */ + executablePrompt: string; + /** + * Combined system instructions and prompt for the environments + * where the two can't be provided separately. + * + * TODO(crisbeto): Can we explain the reason for this better? + */ + combinedPrompt: string; + /** Directory in which the generation will occur. */ + directory: string; +}