feat: collect lighthouse data

crisbeto · crisbeto · commit 09d667e10419 · 2025-10-03T11:09:59.000+02:00
Adds some logic to collect Lighthouse data for the eval run.
diff --git a/package.json b/package.json
@@ -71,6 +71,7 @@
     "genkit": "^1.19.1",
     "genkitx-anthropic": "0.25.0",
     "handlebars": "^4.7.8",
+    "lighthouse": "^12.8.2",
     "limiter": "^3.0.0",
     "marked": "^16.1.1",
     "node-fetch": "^3.3.2",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts
@@ -38,6 +38,7 @@ interface Options {
   autoraterModel?: string;
   a11yRepairAttempts?: number;
   logging?: 'text-only' | 'dynamic';
+  skipLighthouse?: boolean;
 }
 
 function builder(argv: Argv): Argv<Options> {
@@ -153,6 +154,11 @@ function builder(argv: Argv): Argv<Options> {
         default: 0,
         description: 'Number of repair attempts for discovered a11y violations',
       })
+      .option('skip-lighthouse', {
+        type: 'boolean',
+        default: false,
+        description: 'Whether to skip collecting Lighthouse data',
+      })
       .strict()
       .version(false)
       .help()
@@ -197,6 +203,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
       autoraterModel: cliArgs.autoraterModel,
       skipAiSummary: cliArgs.skipAiSummary,
       a11yRepairAttempts: cliArgs.a11yRepairAttempts,
+      skipLighthouse: cliArgs.skipLighthouse,
     });
 
     logReportToConsole(runInfo);
diff --git a/runner/index.ts b/runner/index.ts
@@ -17,6 +17,11 @@ export {
   BuildResultStatus,
   type BuildResult,
 } from './workers/builder/builder-types.js';
+export {
+  type LighthouseResult,
+  type LighthouseCategory,
+  type LighthouseAudit,
+} from './workers/serve-testing/worker-types.js';
 export {type UserJourneysResult} from './orchestration/user-journeys.js';
 export {type AutoRateResult} from './ratings/autoraters/auto-rate-shared.js';
 export {DEFAULT_MODEL_NAME, REPORT_VERSION} from './configuration/constants.js';
diff --git a/runner/orchestration/build-serve-loop.ts b/runner/orchestration/build-serve-loop.ts
@@ -46,6 +46,7 @@ export async function attemptBuild(
   skipScreenshots: boolean,
   skipAxeTesting: boolean,
   enableAutoCsp: boolean,
+  skipLighthouse: boolean,
   userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined,
   maxAxeRepairAttempts: number,
 ) {
@@ -125,6 +126,7 @@ export async function attemptBuild(
       skipScreenshots,
       skipAxeTesting,
       enableAutoCsp,
+      skipLighthouse,
       userJourneyAgentTaskInput,
     );
   }
@@ -194,6 +196,7 @@ export async function attemptBuild(
       skipScreenshots,
       skipAxeTesting,
       enableAutoCsp,
+      skipLighthouse,
       userJourneyAgentTaskInput,
     );
 
diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts
@@ -86,6 +86,7 @@ export async function generateCodeAndAssess(options: {
   logging?: 'text-only' | 'dynamic';
   autoraterModel?: string;
   a11yRepairAttempts?: number;
+  skipLighthouse?: boolean;
 }): Promise<RunInfo> {
   const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner);
   const ratingLlm = await getRunnerByName('genkit');
@@ -181,6 +182,7 @@ export async function generateCodeAndAssess(options: {
                     progress,
                     options.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME,
                     options.a11yRepairAttempts ?? 0,
+                    !!options.skipLighthouse,
                   ),
                 // 10min max per app evaluation.  We just want to make sure it never gets stuck.
                 10,
@@ -311,6 +313,7 @@ async function startEvaluationTask(
   progress: ProgressLogger,
   autoraterModel: string,
   a11yRepairAttempts: number,
+  skipLighthouse: boolean,
 ): Promise<AssessmentResult[]> {
   // Set up the project structure once for the root project.
   const {directory, cleanup} = await setupProjectStructure(
@@ -434,6 +437,7 @@ async function startEvaluationTask(
       skipScreenshots,
       skipAxeTesting,
       enableAutoCsp,
+      skipLighthouse,
       userJourneyAgentTaskInput,
       a11yRepairAttempts,
     );
diff --git a/runner/orchestration/serve-testing-worker.ts b/runner/orchestration/serve-testing-worker.ts
@@ -26,6 +26,7 @@ export async function serveAndTestApp(
   skipScreenshots: boolean,
   skipAxeTesting: boolean,
   enableAutoCsp: boolean,
+  skipLighthouse: boolean,
   userJourneyAgentTaskInput?: BrowserAgentTaskInput,
 ): Promise<ServeTestingResult> {
   progress.log(rootPromptDef, 'serve-testing', `Testing the app`);
@@ -43,6 +44,7 @@ export async function serveAndTestApp(
         enableAutoCsp,
         includeAxeTesting: skipAxeTesting === false,
         takeScreenshots: skipScreenshots === false,
+        includeLighthouseData: skipLighthouse === false,
         userJourneyAgentTaskInput,
       };
 
diff --git a/runner/workers/serve-testing/puppeteer.ts b/runner/workers/serve-testing/puppeteer.ts
@@ -1,10 +1,11 @@
 import {AxePuppeteer} from '@axe-core/puppeteer';
-import {Result} from 'axe-core';
+import {Result as AxeResult} from 'axe-core';
 import puppeteer from 'puppeteer';
+import lighthouse, {RunnerResult as LighthouseRunnerResult} from 'lighthouse';
 import {callWithTimeout} from '../../utils/timeout.js';
 import {AutoCsp} from './auto-csp.js';
 import {CspViolation} from './auto-csp-types.js';
-import {ServeTestingProgressLogFn} from './worker-types.js';
+import {LighthouseAudit, LighthouseResult, ServeTestingProgressLogFn} from './worker-types.js';
 
 /**
  * Uses Puppeteer to take a screenshot of the main page, perform Axe testing,
@@ -18,13 +19,15 @@ export async function runAppInPuppeteer(
   includeAxeTesting: boolean,
   progressLog: ServeTestingProgressLogFn,
   enableAutoCsp: boolean,
+  includeLighthouseData: boolean,
 ) {
   const runtimeErrors: string[] = [];
 
   // Undefined by default so it gets flagged correctly as `skipped` if there's no data.
   let cspViolations: CspViolation[] | undefined;
   let screenshotBase64Data: string | undefined;
-  let axeViolations: Result[] | undefined;
+  let axeViolations: AxeResult[] | undefined;
+  let lighthouseResult: LighthouseResult | undefined;
 
   try {
     const browser = await puppeteer.launch({
@@ -139,6 +142,35 @@ export async function runAppInPuppeteer(
       );
       progressLog('success', 'Screenshot captured and encoded');
     }
+
+    if (includeLighthouseData) {
+      try {
+        progressLog('eval', `Gathering Lighthouse data from ${hostUrl}`);
+        const lighthouseData = await lighthouse(
+          hostUrl,
+          undefined,
+          {
+            extends: 'lighthouse:default',
+            settings: {
+              // Exclude accessibility since it's already covered by Axe above.
+              onlyCategories: ['performance', 'best-practices'],
+            },
+          },
+          page,
+        );
+
+        lighthouseResult = lighthouseData ? processLighthouseData(lighthouseData) : undefined;
+
+        if (lighthouseResult) {
+          progressLog('success', 'Lighthouse data has been collected');
+        } else {
+          progressLog('error', 'Lighthouse did not produce usable data');
+        }
+      } catch (lighthouseError: any) {
+        progressLog('error', 'Could not gather Lighthouse data', lighthouseError.message);
+      }
+    }
+
     await browser.close();
   } catch (screenshotError: any) {
     let details: string = screenshotError.message;
@@ -150,5 +182,52 @@ export async function runAppInPuppeteer(
     progressLog('error', 'Could not take screenshot', details);
   }
 
-  return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations};
+  return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations, lighthouseResult};
+}
+
+function processLighthouseData(data: LighthouseRunnerResult): LighthouseResult | undefined {
+  const availableAudits = new Map<string, LighthouseAudit>();
+  const result: LighthouseResult = {categories: [], uncategorized: []};
+
+  for (const audit of Object.values(data.lhr.audits)) {
+    const type = audit.details?.type;
+    const displayMode = audit.scoreDisplayMode;
+    const isAllowedType =
+      !type ||
+      type === 'list' ||
+      type === 'opportunity' ||
+      (type === 'checklist' && Object.keys(audit.details?.items || {}).length > 0) ||
+      (type === 'table' && audit.details?.items.length);
+    const isAllowedDisplayMode = displayMode === 'binary' || displayMode === 'numeric';
+
+    if (audit.score != null && isAllowedType && isAllowedDisplayMode) {
+      availableAudits.set(audit.id, audit);
+    }
+  }
+
+  for (const category of Object.values(data.lhr.categories)) {
+    const auditsForCategory: LighthouseAudit[] = [];
+
+    for (const ref of category.auditRefs) {
+      const audit = availableAudits.get(ref.id);
+
+      if (audit) {
+        auditsForCategory.push(audit);
+        availableAudits.delete(ref.id);
+      }
+    }
+
+    result.categories.push({
+      id: category.id,
+      displayName: category.title,
+      description: category.description || '',
+      score: category.score || 0,
+      audits: auditsForCategory,
+    });
+  }
+
+  // Track all remaining audits as uncategorized.
+  result.uncategorized.push(...availableAudits.values());
+
+  return result.categories.length === 0 && result.uncategorized.length === 0 ? undefined : result;
 }
diff --git a/runner/workers/serve-testing/worker-types.ts b/runner/workers/serve-testing/worker-types.ts
@@ -1,6 +1,7 @@
+import {Result as AxeResult} from 'axe-core';
+import {RunnerResult as LighthouseRunnerResult} from 'lighthouse';
 import {ProgressType} from '../../progress/progress-logger.js';
 import {AgentOutput, BrowserAgentTaskInput} from '../../testing/browser-agent/models.js';
-import {Result} from 'axe-core';
 import {CspViolation} from './auto-csp-types.js';
 
 /**
@@ -24,8 +25,11 @@ export interface ServeTestingWorkerMessage {
   /** Whether to enable the auto CSP checks. */
   enableAutoCsp: boolean;
 
-  /** User journey browser agent task input */
+  /** User journey browser agent task input. */
   userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined;
+
+  /** Whether to capture Lighthouse data for the run. */
+  includeLighthouseData: boolean;
 }
 
 export interface ServeTestingResult {
@@ -34,7 +38,8 @@ export interface ServeTestingResult {
   runtimeErrors?: string;
   userJourneyAgentOutput: AgentOutput | null;
   cspViolations?: CspViolation[];
-  axeViolations?: Result[];
+  axeViolations?: AxeResult[];
+  lighthouseResult?: LighthouseResult;
 }
 
 export interface ServeTestingResultMessage {
@@ -60,3 +65,18 @@ export type ServeTestingProgressLogFn = (
 export type ServeTestingWorkerResponseMessage =
   | ServeTestingProgressLogMessage
   | ServeTestingResultMessage;
+
+export type LighthouseAudit = LighthouseRunnerResult['lhr']['audits']['x']; // Lighthouse doesn't export this so we need to dig for it.
+
+export interface LighthouseCategory {
+  id: string;
+  displayName: string;
+  description: string;
+  score: number;
+  audits: LighthouseAudit[];
+}
+
+export interface LighthouseResult {
+  categories: LighthouseCategory[];
+  uncategorized: LighthouseAudit[];
+}
diff --git a/runner/workers/serve-testing/worker.ts b/runner/workers/serve-testing/worker.ts
@@ -5,6 +5,7 @@ import {CspViolation} from '../serve-testing/auto-csp-types.js';
 import {runBrowserAgentUserJourneyTests} from '../serve-testing/browser-agent.js';
 import {runAppInPuppeteer} from '../serve-testing/puppeteer.js';
 import {
+  LighthouseResult,
   ServeTestingProgressLogMessage,
   ServeTestingResult,
   ServeTestingWorkerMessage,
@@ -19,6 +20,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
     includeAxeTesting,
     takeScreenshots,
     userJourneyAgentTaskInput,
+    includeLighthouseData,
   } = message;
   const runtimeErrors: string[] = [];
   const progressLog = (state: ProgressType, message: string, details?: string) => {
@@ -29,10 +31,11 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
   };
 
   let result: ServeTestingResult;
-  let screenshotBase64Data: string | undefined = undefined;
+  let screenshotBase64Data: string | undefined;
   let axeViolations: any[] | undefined = [];
   let userJourneyAgentOutput: AgentOutput | null = null;
   let cspViolations: CspViolation[] | undefined = [];
+  let lighthouseResult: LighthouseResult | undefined;
 
   try {
     const puppeteerResult = await callWithTimeout(
@@ -45,13 +48,15 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
           !!includeAxeTesting,
           progressLog,
           !!enableAutoCsp,
+          includeLighthouseData,
         ),
       4, // 4min
     );
 
     screenshotBase64Data = puppeteerResult.screenshotBase64Data;
     axeViolations = puppeteerResult.axeViolations;
     cspViolations = puppeteerResult.cspViolations;
+    lighthouseResult = puppeteerResult.lighthouseResult;
 
     runtimeErrors.push(...puppeteerResult.runtimeErrors);
 
@@ -72,6 +77,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
       axeViolations,
       userJourneyAgentOutput: userJourneyAgentOutput,
       cspViolations,
+      lighthouseResult,
     };
   } catch (error: any) {
     const cleanErrorMessage = cleanupBuildMessage(error.message);
@@ -80,6 +86,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => {
       runtimeErrors: runtimeErrors.join('\n'),
       userJourneyAgentOutput: userJourneyAgentOutput,
       cspViolations,
+      lighthouseResult,
     };
   }