Skip to content

Commit dafe06b

Browse files
committed
feat: collect lighthouse data
Adds some logic to collect Lighthouse data for the eval run.
1 parent b29e756 commit dafe06b

File tree

11 files changed

+928
-38
lines changed

11 files changed

+928
-38
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
"genkit": "^1.19.1",
7272
"genkitx-anthropic": "0.25.0",
7373
"handlebars": "^4.7.8",
74+
"lighthouse": "^12.8.2",
7475
"limiter": "^3.0.0",
7576
"marked": "^16.1.1",
7677
"node-fetch": "^3.3.2",

pnpm-lock.yaml

Lines changed: 782 additions & 30 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

runner/eval-cli.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ interface Options {
3838
autoraterModel?: string;
3939
a11yRepairAttempts?: number;
4040
logging?: 'text-only' | 'dynamic';
41+
skipLighthouse?: boolean;
4142
}
4243

4344
function builder(argv: Argv): Argv<Options> {
@@ -153,6 +154,11 @@ function builder(argv: Argv): Argv<Options> {
153154
default: 0,
154155
description: 'Number of repair attempts for discovered a11y violations',
155156
})
157+
.option('skip-lighthouse', {
158+
type: 'boolean',
159+
default: false,
160+
description: 'Whether to skip collecting Lighthouse data',
161+
})
156162
.strict()
157163
.version(false)
158164
.help()
@@ -197,6 +203,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
197203
autoraterModel: cliArgs.autoraterModel,
198204
skipAiSummary: cliArgs.skipAiSummary,
199205
a11yRepairAttempts: cliArgs.a11yRepairAttempts,
206+
skipLighthouse: cliArgs.skipLighthouse,
200207
});
201208

202209
logReportToConsole(runInfo);

runner/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ export {
1717
BuildResultStatus,
1818
type BuildResult,
1919
} from './workers/builder/builder-types.js';
20+
export {
21+
type LighthouseResult,
22+
type LighthouseCategory,
23+
type LighthouseAudit,
24+
} from './workers/serve-testing/worker-types.js';
2025
export {type UserJourneysResult} from './orchestration/user-journeys.js';
2126
export {type AutoRateResult} from './ratings/autoraters/auto-rate-shared.js';
2227
export {DEFAULT_MODEL_NAME, REPORT_VERSION} from './configuration/constants.js';

runner/orchestration/build-serve-loop.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export async function attemptBuild(
4646
skipScreenshots: boolean,
4747
skipAxeTesting: boolean,
4848
enableAutoCsp: boolean,
49+
skipLighthouse: boolean,
4950
userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined,
5051
maxAxeRepairAttempts: number,
5152
) {
@@ -125,6 +126,7 @@ export async function attemptBuild(
125126
skipScreenshots,
126127
skipAxeTesting,
127128
enableAutoCsp,
129+
skipLighthouse,
128130
userJourneyAgentTaskInput,
129131
);
130132
}
@@ -194,6 +196,7 @@ export async function attemptBuild(
194196
skipScreenshots,
195197
skipAxeTesting,
196198
enableAutoCsp,
199+
skipLighthouse,
197200
userJourneyAgentTaskInput,
198201
);
199202

runner/orchestration/generate.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ export async function generateCodeAndAssess(options: {
8585
logging?: 'text-only' | 'dynamic';
8686
autoraterModel?: string;
8787
a11yRepairAttempts?: number;
88+
skipLighthouse?: boolean;
8889
}): Promise<RunInfo> {
8990
const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner);
9091
const ratingLlm = await getRunnerByName('genkit');
@@ -179,6 +180,7 @@ export async function generateCodeAndAssess(options: {
179180
progress,
180181
options.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME,
181182
options.a11yRepairAttempts ?? 0,
183+
!!options.skipLighthouse,
182184
),
183185
// 10min max per app evaluation. We just want to make sure it never gets stuck.
184186
10,
@@ -307,6 +309,7 @@ async function startEvaluationTask(
307309
progress: ProgressLogger,
308310
autoraterModel: string,
309311
a11yRepairAttempts: number,
312+
skipLighthouse: boolean,
310313
): Promise<AssessmentResult[]> {
311314
// Set up the project structure once for the root project.
312315
const {directory, cleanup} = await setupProjectStructure(
@@ -430,6 +433,7 @@ async function startEvaluationTask(
430433
skipScreenshots,
431434
skipAxeTesting,
432435
enableAutoCsp,
436+
skipLighthouse,
433437
userJourneyAgentTaskInput,
434438
a11yRepairAttempts,
435439
);

runner/orchestration/serve-testing-worker.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export async function serveAndTestApp(
2626
skipScreenshots: boolean,
2727
skipAxeTesting: boolean,
2828
enableAutoCsp: boolean,
29+
skipLighthouse: boolean,
2930
userJourneyAgentTaskInput?: BrowserAgentTaskInput,
3031
): Promise<ServeTestingResult> {
3132
progress.log(rootPromptDef, 'serve-testing', `Testing the app`);
@@ -43,6 +44,7 @@ export async function serveAndTestApp(
4344
enableAutoCsp,
4445
includeAxeTesting: skipAxeTesting === false,
4546
takeScreenshots: skipScreenshots === false,
47+
includeLighthouseData: skipLighthouse === false,
4648
userJourneyAgentTaskInput,
4749
};
4850

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import lighthouse from 'lighthouse';
2+
import {Page} from 'puppeteer';
3+
import {LighthouseAudit, LighthouseResult} from './worker-types.js';
4+
5+
export async function getLighthouseData(
6+
hostUrl: string,
7+
page: Page,
8+
): Promise<LighthouseResult | undefined> {
9+
const data = await lighthouse(
10+
hostUrl,
11+
undefined,
12+
{
13+
extends: 'lighthouse:default',
14+
settings: {
15+
// Exclude accessibility since it's already covered by Axe above.
16+
onlyCategories: ['performance', 'best-practices'],
17+
},
18+
},
19+
page,
20+
);
21+
22+
if (!data) {
23+
return undefined;
24+
}
25+
26+
const availableAudits = new Map<string, LighthouseAudit>();
27+
const result: LighthouseResult = {categories: [], uncategorized: []};
28+
29+
for (const audit of Object.values(data.lhr.audits)) {
30+
const type = audit.details?.type;
31+
const displayMode = audit.scoreDisplayMode;
32+
const isAllowedType =
33+
!type ||
34+
type === 'list' ||
35+
type === 'opportunity' ||
36+
(type === 'checklist' && Object.keys(audit.details?.items || {}).length > 0) ||
37+
(type === 'table' && audit.details?.items.length);
38+
const isAllowedDisplayMode = displayMode === 'binary' || displayMode === 'numeric';
39+
40+
if (audit.score != null && isAllowedType && isAllowedDisplayMode) {
41+
availableAudits.set(audit.id, audit);
42+
}
43+
}
44+
45+
for (const category of Object.values(data.lhr.categories)) {
46+
const auditsForCategory: LighthouseAudit[] = [];
47+
48+
for (const ref of category.auditRefs) {
49+
const audit = availableAudits.get(ref.id);
50+
51+
if (audit) {
52+
auditsForCategory.push(audit);
53+
availableAudits.delete(ref.id);
54+
}
55+
}
56+
57+
result.categories.push({
58+
id: category.id,
59+
displayName: category.title,
60+
description: category.description || '',
61+
score: category.score || 0,
62+
audits: auditsForCategory,
63+
});
64+
}
65+
66+
// Track all remaining audits as uncategorized.
67+
result.uncategorized.push(...availableAudits.values());
68+
69+
return result.categories.length === 0 && result.uncategorized.length === 0 ? undefined : result;
70+
}

runner/workers/serve-testing/puppeteer.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import {AxePuppeteer} from '@axe-core/puppeteer';
2-
import {Result} from 'axe-core';
2+
import {Result as AxeResult} from 'axe-core';
33
import puppeteer from 'puppeteer';
44
import {callWithTimeout} from '../../utils/timeout.js';
55
import {AutoCsp} from './auto-csp.js';
66
import {CspViolation} from './auto-csp-types.js';
7-
import {ServeTestingProgressLogFn} from './worker-types.js';
7+
import {LighthouseResult, ServeTestingProgressLogFn} from './worker-types.js';
8+
import {getLighthouseData} from './lighthouse.js';
89

910
/**
1011
* Uses Puppeteer to take a screenshot of the main page, perform Axe testing,
@@ -18,13 +19,15 @@ export async function runAppInPuppeteer(
1819
includeAxeTesting: boolean,
1920
progressLog: ServeTestingProgressLogFn,
2021
enableAutoCsp: boolean,
22+
includeLighthouseData: boolean,
2123
) {
2224
const runtimeErrors: string[] = [];
2325

2426
// Undefined by default so it gets flagged correctly as `skipped` if there's no data.
2527
let cspViolations: CspViolation[] | undefined;
2628
let screenshotBase64Data: string | undefined;
27-
let axeViolations: Result[] | undefined;
29+
let axeViolations: AxeResult[] | undefined;
30+
let lighthouseResult: LighthouseResult | undefined;
2831

2932
try {
3033
const browser = await puppeteer.launch({
@@ -139,6 +142,22 @@ export async function runAppInPuppeteer(
139142
);
140143
progressLog('success', 'Screenshot captured and encoded');
141144
}
145+
146+
if (includeLighthouseData) {
147+
try {
148+
progressLog('eval', `Gathering Lighthouse data from ${hostUrl}`);
149+
lighthouseResult = await getLighthouseData(hostUrl, page);
150+
151+
if (lighthouseResult) {
152+
progressLog('success', 'Lighthouse data has been collected');
153+
} else {
154+
progressLog('error', 'Lighthouse did not produce usable data');
155+
}
156+
} catch (lighthouseError: any) {
157+
progressLog('error', 'Could not gather Lighthouse data', lighthouseError.message);
158+
}
159+
}
160+
142161
await browser.close();
143162
} catch (screenshotError: any) {
144163
let details: string = screenshotError.message;
@@ -150,5 +169,5 @@ export async function runAppInPuppeteer(
150169
progressLog('error', 'Could not take screenshot', details);
151170
}
152171

153-
return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations};
172+
return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations, lighthouseResult};
154173
}

runner/workers/serve-testing/worker-types.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import {Result as AxeResult} from 'axe-core';
2+
import {RunnerResult as LighthouseRunnerResult} from 'lighthouse';
13
import {ProgressType} from '../../progress/progress-logger.js';
24
import {AgentOutput, BrowserAgentTaskInput} from '../../testing/browser-agent/models.js';
3-
import {Result} from 'axe-core';
45
import {CspViolation} from './auto-csp-types.js';
56

67
/**
@@ -24,8 +25,11 @@ export interface ServeTestingWorkerMessage {
2425
/** Whether to enable the auto CSP checks. */
2526
enableAutoCsp: boolean;
2627

27-
/** User journey browser agent task input */
28+
/** User journey browser agent task input. */
2829
userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined;
30+
31+
/** Whether to capture Lighthouse data for the run. */
32+
includeLighthouseData: boolean;
2933
}
3034

3135
export interface ServeTestingResult {
@@ -34,7 +38,8 @@ export interface ServeTestingResult {
3438
runtimeErrors?: string;
3539
userJourneyAgentOutput: AgentOutput | null;
3640
cspViolations?: CspViolation[];
37-
axeViolations?: Result[];
41+
axeViolations?: AxeResult[];
42+
lighthouseResult?: LighthouseResult;
3843
}
3944

4045
export interface ServeTestingResultMessage {
@@ -60,3 +65,18 @@ export type ServeTestingProgressLogFn = (
6065
export type ServeTestingWorkerResponseMessage =
6166
| ServeTestingProgressLogMessage
6267
| ServeTestingResultMessage;
68+
69+
export type LighthouseAudit = LighthouseRunnerResult['lhr']['audits']['x']; // Lighthouse doesn't export this so we need to dig for it.
70+
71+
export interface LighthouseCategory {
72+
id: string;
73+
displayName: string;
74+
description: string;
75+
score: number;
76+
audits: LighthouseAudit[];
77+
}
78+
79+
export interface LighthouseResult {
80+
categories: LighthouseCategory[];
81+
uncategorized: LighthouseAudit[];
82+
}

0 commit comments

Comments
 (0)