Skip to content

Commit 09d667e

Browse files
committed
feat: collect lighthouse data
Adds some logic to collect Lighthouse data for the eval run.
1 parent 9416a96 commit 09d667e

File tree

10 files changed

+946
-66
lines changed

10 files changed

+946
-66
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
"genkit": "^1.19.1",
7272
"genkitx-anthropic": "0.25.0",
7373
"handlebars": "^4.7.8",
74+
"lighthouse": "^12.8.2",
7475
"limiter": "^3.0.0",
7576
"marked": "^16.1.1",
7677
"node-fetch": "^3.3.2",

pnpm-lock.yaml

Lines changed: 810 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

runner/eval-cli.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ interface Options {
3838
autoraterModel?: string;
3939
a11yRepairAttempts?: number;
4040
logging?: 'text-only' | 'dynamic';
41+
skipLighthouse?: boolean;
4142
}
4243

4344
function builder(argv: Argv): Argv<Options> {
@@ -153,6 +154,11 @@ function builder(argv: Argv): Argv<Options> {
153154
default: 0,
154155
description: 'Number of repair attempts for discovered a11y violations',
155156
})
157+
.option('skip-lighthouse', {
158+
type: 'boolean',
159+
default: false,
160+
description: 'Whether to skip collecting Lighthouse data',
161+
})
156162
.strict()
157163
.version(false)
158164
.help()
@@ -197,6 +203,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
197203
autoraterModel: cliArgs.autoraterModel,
198204
skipAiSummary: cliArgs.skipAiSummary,
199205
a11yRepairAttempts: cliArgs.a11yRepairAttempts,
206+
skipLighthouse: cliArgs.skipLighthouse,
200207
});
201208

202209
logReportToConsole(runInfo);

runner/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ export {
1717
BuildResultStatus,
1818
type BuildResult,
1919
} from './workers/builder/builder-types.js';
20+
export {
21+
type LighthouseResult,
22+
type LighthouseCategory,
23+
type LighthouseAudit,
24+
} from './workers/serve-testing/worker-types.js';
2025
export {type UserJourneysResult} from './orchestration/user-journeys.js';
2126
export {type AutoRateResult} from './ratings/autoraters/auto-rate-shared.js';
2227
export {DEFAULT_MODEL_NAME, REPORT_VERSION} from './configuration/constants.js';

runner/orchestration/build-serve-loop.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export async function attemptBuild(
4646
skipScreenshots: boolean,
4747
skipAxeTesting: boolean,
4848
enableAutoCsp: boolean,
49+
skipLighthouse: boolean,
4950
userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined,
5051
maxAxeRepairAttempts: number,
5152
) {
@@ -125,6 +126,7 @@ export async function attemptBuild(
125126
skipScreenshots,
126127
skipAxeTesting,
127128
enableAutoCsp,
129+
skipLighthouse,
128130
userJourneyAgentTaskInput,
129131
);
130132
}
@@ -194,6 +196,7 @@ export async function attemptBuild(
194196
skipScreenshots,
195197
skipAxeTesting,
196198
enableAutoCsp,
199+
skipLighthouse,
197200
userJourneyAgentTaskInput,
198201
);
199202

runner/orchestration/generate.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ export async function generateCodeAndAssess(options: {
8686
logging?: 'text-only' | 'dynamic';
8787
autoraterModel?: string;
8888
a11yRepairAttempts?: number;
89+
skipLighthouse?: boolean;
8990
}): Promise<RunInfo> {
9091
const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner);
9192
const ratingLlm = await getRunnerByName('genkit');
@@ -181,6 +182,7 @@ export async function generateCodeAndAssess(options: {
181182
progress,
182183
options.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME,
183184
options.a11yRepairAttempts ?? 0,
185+
!!options.skipLighthouse,
184186
),
185187
// 10min max per app evaluation. We just want to make sure it never gets stuck.
186188
10,
@@ -311,6 +313,7 @@ async function startEvaluationTask(
311313
progress: ProgressLogger,
312314
autoraterModel: string,
313315
a11yRepairAttempts: number,
316+
skipLighthouse: boolean,
314317
): Promise<AssessmentResult[]> {
315318
// Set up the project structure once for the root project.
316319
const {directory, cleanup} = await setupProjectStructure(
@@ -434,6 +437,7 @@ async function startEvaluationTask(
434437
skipScreenshots,
435438
skipAxeTesting,
436439
enableAutoCsp,
440+
skipLighthouse,
437441
userJourneyAgentTaskInput,
438442
a11yRepairAttempts,
439443
);

runner/orchestration/serve-testing-worker.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export async function serveAndTestApp(
2626
skipScreenshots: boolean,
2727
skipAxeTesting: boolean,
2828
enableAutoCsp: boolean,
29+
skipLighthouse: boolean,
2930
userJourneyAgentTaskInput?: BrowserAgentTaskInput,
3031
): Promise<ServeTestingResult> {
3132
progress.log(rootPromptDef, 'serve-testing', `Testing the app`);
@@ -43,6 +44,7 @@ export async function serveAndTestApp(
4344
enableAutoCsp,
4445
includeAxeTesting: skipAxeTesting === false,
4546
takeScreenshots: skipScreenshots === false,
47+
includeLighthouseData: skipLighthouse === false,
4648
userJourneyAgentTaskInput,
4749
};
4850

runner/workers/serve-testing/puppeteer.ts

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import {AxePuppeteer} from '@axe-core/puppeteer';
2-
import {Result} from 'axe-core';
2+
import {Result as AxeResult} from 'axe-core';
33
import puppeteer from 'puppeteer';
4+
import lighthouse, {RunnerResult as LighthouseRunnerResult} from 'lighthouse';
45
import {callWithTimeout} from '../../utils/timeout.js';
56
import {AutoCsp} from './auto-csp.js';
67
import {CspViolation} from './auto-csp-types.js';
7-
import {ServeTestingProgressLogFn} from './worker-types.js';
8+
import {LighthouseAudit, LighthouseResult, ServeTestingProgressLogFn} from './worker-types.js';
89

910
/**
1011
* Uses Puppeteer to take a screenshot of the main page, perform Axe testing,
@@ -18,13 +19,15 @@ export async function runAppInPuppeteer(
1819
includeAxeTesting: boolean,
1920
progressLog: ServeTestingProgressLogFn,
2021
enableAutoCsp: boolean,
22+
includeLighthouseData: boolean,
2123
) {
2224
const runtimeErrors: string[] = [];
2325

2426
// Undefined by default so it gets flagged correctly as `skipped` if there's no data.
2527
let cspViolations: CspViolation[] | undefined;
2628
let screenshotBase64Data: string | undefined;
27-
let axeViolations: Result[] | undefined;
29+
let axeViolations: AxeResult[] | undefined;
30+
let lighthouseResult: LighthouseResult | undefined;
2831

2932
try {
3033
const browser = await puppeteer.launch({
@@ -139,6 +142,35 @@ export async function runAppInPuppeteer(
139142
);
140143
progressLog('success', 'Screenshot captured and encoded');
141144
}
145+
146+
if (includeLighthouseData) {
147+
try {
148+
progressLog('eval', `Gathering Lighthouse data from ${hostUrl}`);
149+
const lighthouseData = await lighthouse(
150+
hostUrl,
151+
undefined,
152+
{
153+
extends: 'lighthouse:default',
154+
settings: {
155+
// Exclude accessibility since it's already covered by Axe above.
156+
onlyCategories: ['performance', 'best-practices'],
157+
},
158+
},
159+
page,
160+
);
161+
162+
lighthouseResult = lighthouseData ? processLighthouseData(lighthouseData) : undefined;
163+
164+
if (lighthouseResult) {
165+
progressLog('success', 'Lighthouse data has been collected');
166+
} else {
167+
progressLog('error', 'Lighthouse did not produce usable data');
168+
}
169+
} catch (lighthouseError: any) {
170+
progressLog('error', 'Could not gather Lighthouse data', lighthouseError.message);
171+
}
172+
}
173+
142174
await browser.close();
143175
} catch (screenshotError: any) {
144176
let details: string = screenshotError.message;
@@ -150,5 +182,52 @@ export async function runAppInPuppeteer(
150182
progressLog('error', 'Could not take screenshot', details);
151183
}
152184

153-
return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations};
185+
return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations, lighthouseResult};
186+
}
187+
188+
function processLighthouseData(data: LighthouseRunnerResult): LighthouseResult | undefined {
189+
const availableAudits = new Map<string, LighthouseAudit>();
190+
const result: LighthouseResult = {categories: [], uncategorized: []};
191+
192+
for (const audit of Object.values(data.lhr.audits)) {
193+
const type = audit.details?.type;
194+
const displayMode = audit.scoreDisplayMode;
195+
const isAllowedType =
196+
!type ||
197+
type === 'list' ||
198+
type === 'opportunity' ||
199+
(type === 'checklist' && Object.keys(audit.details?.items || {}).length > 0) ||
200+
(type === 'table' && audit.details?.items.length);
201+
const isAllowedDisplayMode = displayMode === 'binary' || displayMode === 'numeric';
202+
203+
if (audit.score != null && isAllowedType && isAllowedDisplayMode) {
204+
availableAudits.set(audit.id, audit);
205+
}
206+
}
207+
208+
for (const category of Object.values(data.lhr.categories)) {
209+
const auditsForCategory: LighthouseAudit[] = [];
210+
211+
for (const ref of category.auditRefs) {
212+
const audit = availableAudits.get(ref.id);
213+
214+
if (audit) {
215+
auditsForCategory.push(audit);
216+
availableAudits.delete(ref.id);
217+
}
218+
}
219+
220+
result.categories.push({
221+
id: category.id,
222+
displayName: category.title,
223+
description: category.description || '',
224+
score: category.score || 0,
225+
audits: auditsForCategory,
226+
});
227+
}
228+
229+
// Track all remaining audits as uncategorized.
230+
result.uncategorized.push(...availableAudits.values());
231+
232+
return result.categories.length === 0 && result.uncategorized.length === 0 ? undefined : result;
154233
}

runner/workers/serve-testing/worker-types.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import {Result as AxeResult} from 'axe-core';
2+
import {RunnerResult as LighthouseRunnerResult} from 'lighthouse';
13
import {ProgressType} from '../../progress/progress-logger.js';
24
import {AgentOutput, BrowserAgentTaskInput} from '../../testing/browser-agent/models.js';
3-
import {Result} from 'axe-core';
45
import {CspViolation} from './auto-csp-types.js';
56

67
/**
@@ -24,8 +25,11 @@ export interface ServeTestingWorkerMessage {
2425
/** Whether to enable the auto CSP checks. */
2526
enableAutoCsp: boolean;
2627

27-
/** User journey browser agent task input */
28+
/** User journey browser agent task input. */
2829
userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined;
30+
31+
/** Whether to capture Lighthouse data for the run. */
32+
includeLighthouseData: boolean;
2933
}
3034

3135
export interface ServeTestingResult {
@@ -34,7 +38,8 @@ export interface ServeTestingResult {
3438
runtimeErrors?: string;
3539
userJourneyAgentOutput: AgentOutput | null;
3640
cspViolations?: CspViolation[];
37-
axeViolations?: Result[];
41+
axeViolations?: AxeResult[];
42+
lighthouseResult?: LighthouseResult;
3843
}
3944

4045
export interface ServeTestingResultMessage {
@@ -60,3 +65,18 @@ export type ServeTestingProgressLogFn = (
6065
export type ServeTestingWorkerResponseMessage =
6166
| ServeTestingProgressLogMessage
6267
| ServeTestingResultMessage;
68+
69+
export type LighthouseAudit = LighthouseRunnerResult['lhr']['audits']['x']; // Lighthouse doesn't export this so we need to dig for it.
70+
71+
export interface LighthouseCategory {
72+
id: string;
73+
displayName: string;
74+
description: string;
75+
score: number;
76+
audits: LighthouseAudit[];
77+
}
78+
79+
export interface LighthouseResult {
80+
categories: LighthouseCategory[];
81+
uncategorized: LighthouseAudit[];
82+
}

runner/workers/serve-testing/worker.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {CspViolation} from '../serve-testing/auto-csp-types.js';
55
import {runBrowserAgentUserJourneyTests} from '../serve-testing/browser-agent.js';
66
import {runAppInPuppeteer} from '../serve-testing/puppeteer.js';
77
import {
8+
LighthouseResult,
89
ServeTestingProgressLogMessage,
910
ServeTestingResult,
1011
ServeTestingWorkerMessage,
@@ -19,6 +20,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
1920
includeAxeTesting,
2021
takeScreenshots,
2122
userJourneyAgentTaskInput,
23+
includeLighthouseData,
2224
} = message;
2325
const runtimeErrors: string[] = [];
2426
const progressLog = (state: ProgressType, message: string, details?: string) => {
@@ -29,10 +31,11 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
2931
};
3032

3133
let result: ServeTestingResult;
32-
let screenshotBase64Data: string | undefined = undefined;
34+
let screenshotBase64Data: string | undefined;
3335
let axeViolations: any[] | undefined = [];
3436
let userJourneyAgentOutput: AgentOutput | null = null;
3537
let cspViolations: CspViolation[] | undefined = [];
38+
let lighthouseResult: LighthouseResult | undefined;
3639

3740
try {
3841
const puppeteerResult = await callWithTimeout(
@@ -45,13 +48,15 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
4548
!!includeAxeTesting,
4649
progressLog,
4750
!!enableAutoCsp,
51+
includeLighthouseData,
4852
),
4953
4, // 4min
5054
);
5155

5256
screenshotBase64Data = puppeteerResult.screenshotBase64Data;
5357
axeViolations = puppeteerResult.axeViolations;
5458
cspViolations = puppeteerResult.cspViolations;
59+
lighthouseResult = puppeteerResult.lighthouseResult;
5560

5661
runtimeErrors.push(...puppeteerResult.runtimeErrors);
5762

@@ -72,6 +77,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
7277
axeViolations,
7378
userJourneyAgentOutput: userJourneyAgentOutput,
7479
cspViolations,
80+
lighthouseResult,
7581
};
7682
} catch (error: any) {
7783
const cleanErrorMessage = cleanupBuildMessage(error.message);
@@ -80,6 +86,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
8086
runtimeErrors: runtimeErrors.join('\n'),
8187
userJourneyAgentOutput: userJourneyAgentOutput,
8288
cspViolations,
89+
lighthouseResult,
8390
};
8491
}
8592

0 commit comments

Comments
 (0)