Skip to content

Commit 6dba326

Browse files
committed
refactor: simplify signature of some functions
Switches to passing around an options object, instead of having functions with 18 arguments.
1 parent 529e9b4 commit 6dba326

File tree

4 files changed

+65
-97
lines changed

4 files changed

+65
-97
lines changed

runner/orchestration/build-serve-loop.ts

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ import PQueue from 'p-queue';
22
import {LlmGenerateFilesResponse} from '../codegen/llm-runner.js';
33
import {BuildResultStatus} from '../workers/builder/builder-types.js';
44
import {Environment} from '../configuration/environment.js';
5-
import {AttemptDetails, LlmContextFile, RootPromptDefinition} from '../shared-interfaces.js';
5+
import {
6+
AssessmentConfig,
7+
AttemptDetails,
8+
LlmContextFile,
9+
RootPromptDefinition,
10+
} from '../shared-interfaces.js';
611
import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js';
712
import {ProgressLogger} from '../progress/progress-logger.js';
813
import {runBuild} from './build-worker.js';
@@ -31,9 +36,9 @@ import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js';
3136
* @param workerConcurrencyQueue Concurrency queue for controlling parallelism of worker invocations (as they are more expensive than LLM calls).
3237
*/
3338
export async function attemptBuild(
39+
config: AssessmentConfig,
3440
evalID: EvalID,
3541
gateway: Gateway<Environment>,
36-
model: string,
3742
env: Environment,
3843
rootPromptDef: RootPromptDefinition,
3944
directory: string,
@@ -43,12 +48,7 @@ export async function attemptBuild(
4348
abortSignal: AbortSignal,
4449
workerConcurrencyQueue: PQueue,
4550
progress: ProgressLogger,
46-
skipScreenshots: boolean,
47-
skipAxeTesting: boolean,
48-
enableAutoCsp: boolean,
49-
skipLighthouse: boolean,
5051
userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined,
51-
maxAxeRepairAttempts: number,
5252
) {
5353
const initialBuildResult = await runBuild(
5454
evalID,
@@ -93,7 +93,7 @@ export async function attemptBuild(
9393
const attempt = await repairAndBuild(
9494
evalID,
9595
gateway,
96-
model,
96+
config.model,
9797
env,
9898
rootPromptDef,
9999
directory,
@@ -115,6 +115,7 @@ export async function attemptBuild(
115115
// Now that we got a working app, try to serve it and collect
116116
// findings from the running app.
117117
lastAttempt.serveTestingResult = await serveAndTestApp(
118+
config,
118119
evalID,
119120
gateway,
120121
directory,
@@ -123,10 +124,6 @@ export async function attemptBuild(
123124
workerConcurrencyQueue,
124125
abortSignal,
125126
progress,
126-
skipScreenshots,
127-
skipAxeTesting,
128-
enableAutoCsp,
129-
skipLighthouse,
130127
userJourneyAgentTaskInput,
131128
);
132129
}
@@ -138,7 +135,7 @@ export async function attemptBuild(
138135
while (
139136
lastAttempt.serveTestingResult &&
140137
(lastAttempt.serveTestingResult.axeViolations?.length ?? 0) > 0 &&
141-
axeRepairAttempts < maxAxeRepairAttempts
138+
axeRepairAttempts < (config.a11yRepairAttempts ?? 0)
142139
) {
143140
axeRepairAttempts++;
144141
progress.log(
@@ -158,7 +155,7 @@ export async function attemptBuild(
158155
const attempt = await repairAndBuild(
159156
evalID,
160157
gateway,
161-
model,
158+
config.model,
162159
env,
163160
rootPromptDef,
164161
directory,
@@ -185,6 +182,7 @@ export async function attemptBuild(
185182
// Re-run serving & tests after Axe repair.
186183
// This allows us to check if we fixed the violations.
187184
attempt.serveTestingResult = await serveAndTestApp(
185+
config,
188186
evalID,
189187
gateway,
190188
directory,
@@ -193,10 +191,6 @@ export async function attemptBuild(
193191
workerConcurrencyQueue,
194192
abortSignal,
195193
progress,
196-
skipScreenshots,
197-
skipAxeTesting,
198-
enableAutoCsp,
199-
skipLighthouse,
200194
userJourneyAgentTaskInput,
201195
);
202196

runner/orchestration/generate.ts

Lines changed: 21 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {Environment} from '../configuration/environment.js';
1919
import {rateGeneratedCode} from '../ratings/rate-code.js';
2020
import {redX} from '../reporting/format.js';
2121
import {
22+
AssessmentConfig,
2223
AssessmentResult,
2324
AttemptDetails,
2425
CompletionStats,
@@ -49,7 +50,7 @@ import {getRunGroupId} from './grouping.js';
4950
import {executeCommand} from '../utils/exec.js';
5051
import {EvalID, Gateway} from './gateway.js';
5152
import {LocalEnvironment} from '../configuration/environment-local.js';
52-
import {getRunnerByName, RunnerName} from '../codegen/runner-creation.js';
53+
import {getRunnerByName} from '../codegen/runner-creation.js';
5354
import {summarizeReportWithAI} from '../reporting/report-ai-summary.js';
5455

5556
/**
@@ -64,29 +65,7 @@ import {summarizeReportWithAI} from '../reporting/report-ai-summary.js';
6465
* @returns A Promise that resolves to an array of AssessmentResult objects,
6566
* each containing the prompt, generated code, and final validation status.
6667
*/
67-
export async function generateCodeAndAssess(options: {
68-
model: string;
69-
runner: RunnerName;
70-
environmentConfigPath: string;
71-
localMode: boolean;
72-
limit: number;
73-
concurrency: number | 'auto';
74-
reportName: string;
75-
skipScreenshots: boolean;
76-
startMcp?: boolean;
77-
ragEndpoint?: string;
78-
outputDirectory?: string;
79-
promptFilter?: string;
80-
labels: string[];
81-
skipAiSummary?: boolean;
82-
skipAxeTesting: boolean;
83-
enableUserJourneyTesting?: boolean;
84-
enableAutoCsp?: boolean;
85-
logging?: 'text-only' | 'dynamic';
86-
autoraterModel?: string;
87-
a11yRepairAttempts?: number;
88-
skipLighthouse?: boolean;
89-
}): Promise<RunInfo> {
68+
export async function generateCodeAndAssess(options: AssessmentConfig): Promise<RunInfo> {
9069
const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner);
9170
const ratingLlm = await getRunnerByName('genkit');
9271

@@ -162,25 +141,15 @@ export async function generateCodeAndAssess(options: {
162141
`Evaluation of ${rootPromptDef.name}`,
163142
async abortSignal =>
164143
startEvaluationTask(
144+
options,
165145
evalID,
166146
env,
167147
env.gateway,
168148
ratingLlm,
169-
options.model,
170149
rootPromptDef,
171-
options.localMode,
172-
options.skipScreenshots,
173-
options.outputDirectory,
174-
options.ragEndpoint,
175150
abortSignal,
176-
options.skipAxeTesting,
177-
!!options.enableUserJourneyTesting,
178-
!!options.enableAutoCsp,
179151
workerConcurrencyQueue,
180152
progress,
181-
options.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME,
182-
options.a11yRepairAttempts ?? 0,
183-
!!options.skipLighthouse,
184153
),
185154
// 10min max per app evaluation. We just want to make sure it never gets stuck.
186155
10,
@@ -291,40 +260,30 @@ export async function generateCodeAndAssess(options: {
291260
* @returns A Promise that resolves to an AssessmentResult object containing all details of the task's execution.
292261
*/
293262
async function startEvaluationTask(
263+
config: AssessmentConfig,
294264
evalID: EvalID,
295265
env: Environment,
296266
gateway: Gateway<Environment>,
297267
ratingLlm: GenkitRunner,
298-
model: string,
299268
rootPromptDef: PromptDefinition | MultiStepPromptDefinition,
300-
localMode: boolean,
301-
skipScreenshots: boolean,
302-
outputDirectory: string | undefined,
303-
ragEndpoint: string | undefined,
304269
abortSignal: AbortSignal,
305-
skipAxeTesting: boolean,
306-
enableUserJourneyTesting: boolean,
307-
enableAutoCsp: boolean,
308270
workerConcurrencyQueue: PQueue,
309271
progress: ProgressLogger,
310-
autoraterModel: string,
311-
a11yRepairAttempts: number,
312-
skipLighthouse: boolean,
313272
): Promise<AssessmentResult[]> {
314273
// Set up the project structure once for the root project.
315274
const {directory, cleanup} = await setupProjectStructure(
316275
env,
317276
rootPromptDef,
318277
progress,
319-
outputDirectory,
278+
config.outputDirectory,
320279
);
321280

322281
const results: AssessmentResult[] = [];
323282
const defsToExecute = rootPromptDef.kind === 'single' ? [rootPromptDef] : rootPromptDef.steps;
324283

325284
for (const promptDef of defsToExecute) {
326285
const [fullPromptText, systemInstructions] = await Promise.all([
327-
env.getPrompt(promptDef.systemPromptType, promptDef.prompt, ragEndpoint),
286+
env.getPrompt(promptDef.systemPromptType, promptDef.prompt, config.ragEndpoint),
328287
env.getPrompt(promptDef.systemPromptType, ''),
329288
]);
330289

@@ -334,9 +293,8 @@ async function startEvaluationTask(
334293

335294
// Generate the initial set of files through the LLM.
336295
const initialResponse = await generateInitialFiles(
296+
config,
337297
evalID,
338-
gateway,
339-
model,
340298
env,
341299
promptDef,
342300
{
@@ -349,7 +307,6 @@ async function startEvaluationTask(
349307
possiblePackageManagers: getPossiblePackageManagers().slice(),
350308
},
351309
contextFiles,
352-
localMode,
353310
abortSignal,
354311
progress,
355312
);
@@ -406,21 +363,22 @@ async function startEvaluationTask(
406363

407364
// TODO: Only execute the serve command on the "final working attempt".
408365
// TODO: Incorporate usage.
409-
const userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined = enableUserJourneyTesting
410-
? {
411-
userJourneys: userJourneys.result,
412-
appPrompt: defsToExecute[0].prompt,
413-
}
414-
: undefined;
366+
const userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined =
367+
config.enableUserJourneyTesting
368+
? {
369+
userJourneys: userJourneys.result,
370+
appPrompt: defsToExecute[0].prompt,
371+
}
372+
: undefined;
415373

416374
const attemptDetails: AttemptDetails[] = []; // Store details for assessment.json
417375

418376
// Try to build the files in the root prompt directory.
419377
// This will also attempt to fix issues with the generated code.
420378
const attempt = await attemptBuild(
379+
config,
421380
evalID,
422381
gateway,
423-
model,
424382
env,
425383
rootPromptDef,
426384
directory,
@@ -430,12 +388,7 @@ async function startEvaluationTask(
430388
abortSignal,
431389
workerConcurrencyQueue,
432390
progress,
433-
skipScreenshots,
434-
skipAxeTesting,
435-
enableAutoCsp,
436-
skipLighthouse,
437391
userJourneyAgentTaskInput,
438-
a11yRepairAttempts,
439392
);
440393

441394
if (!attempt) {
@@ -455,7 +408,7 @@ async function startEvaluationTask(
455408
attempt.axeRepairAttempts,
456409
abortSignal,
457410
progress,
458-
autoraterModel,
411+
config.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME,
459412
);
460413

461414
results.push({
@@ -493,18 +446,16 @@ async function startEvaluationTask(
493446
* @param abortSignal Signal to fire when this process should be aborted.
494447
*/
495448
async function generateInitialFiles(
449+
options: AssessmentConfig,
496450
evalID: EvalID,
497-
gateway: Gateway<Environment>,
498-
model: string,
499451
env: Environment,
500452
promptDef: RootPromptDefinition,
501453
codegenContext: LlmGenerateFilesContext,
502454
contextFiles: LlmContextFile[],
503-
localMode: boolean,
504455
abortSignal: AbortSignal,
505456
progress: ProgressLogger,
506457
): Promise<LlmGenerateFilesResponse> {
507-
if (localMode) {
458+
if (options.localMode) {
508459
const localFilesDirectory = join(LLM_OUTPUT_DIR, env.id, promptDef.name);
509460
const filePaths = globSync('**/*', {cwd: localFilesDirectory});
510461

@@ -531,10 +482,10 @@ async function generateInitialFiles(
531482

532483
progress.log(promptDef, 'codegen', 'Generating code with AI');
533484

534-
const response = await gateway.generateInitialFiles(
485+
const response = await env.gateway.generateInitialFiles(
535486
evalID,
536487
codegenContext,
537-
model,
488+
options.model,
538489
contextFiles,
539490
abortSignal,
540491
);

runner/orchestration/serve-testing-worker.ts

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import {ChildProcess, fork} from 'node:child_process';
22
import path from 'node:path';
33
import {Environment} from '../configuration/environment.js';
44
import {ProgressLogger} from '../progress/progress-logger.js';
5-
import {RootPromptDefinition} from '../shared-interfaces.js';
5+
import {AssessmentConfig, RootPromptDefinition} from '../shared-interfaces.js';
66
import {killChildProcessGracefully} from '../utils/kill-gracefully.js';
77
import {
88
ServeTestingResult,
@@ -15,6 +15,7 @@ import PQueue from 'p-queue';
1515

1616
/** Attempts to run & test an eval app. */
1717
export async function serveAndTestApp(
18+
config: AssessmentConfig,
1819
evalID: EvalID,
1920
gateway: Gateway<Environment>,
2021
appDirectoryPath: string,
@@ -23,10 +24,6 @@ export async function serveAndTestApp(
2324
workerConcurrencyQueue: PQueue,
2425
abortSignal: AbortSignal,
2526
progress: ProgressLogger,
26-
skipScreenshots: boolean,
27-
skipAxeTesting: boolean,
28-
enableAutoCsp: boolean,
29-
skipLighthouse: boolean,
3027
userJourneyAgentTaskInput?: BrowserAgentTaskInput,
3128
): Promise<ServeTestingResult> {
3229
progress.log(rootPromptDef, 'serve-testing', `Testing the app`);
@@ -41,10 +38,10 @@ export async function serveAndTestApp(
4138
const serveParams: ServeTestingWorkerMessage = {
4239
serveUrl,
4340
appName: rootPromptDef.name,
44-
enableAutoCsp,
45-
includeAxeTesting: skipAxeTesting === false,
46-
takeScreenshots: skipScreenshots === false,
47-
includeLighthouseData: skipLighthouse === false,
41+
enableAutoCsp: !!config.enableAutoCsp,
42+
includeAxeTesting: config.skipAxeTesting === false,
43+
takeScreenshots: config.skipScreenshots === false,
44+
includeLighthouseData: config.skipLighthouse === false,
4845
userJourneyAgentTaskInput,
4946
};
5047

0 commit comments

Comments
 (0)